2020-12-23 22:21:36 +01:00
|
|
|
/* SPDX-License-Identifier: LGPL-2.1-or-later */
|
2014-07-24 08:53:33 -04:00
|
|
|
/*
|
2019-10-01 09:20:35 +02:00
|
|
|
* Copyright (C) 2007 - 2008 Novell, Inc.
|
|
|
|
|
* Copyright (C) 2007 - 2018 Red Hat, Inc.
|
2014-07-24 08:53:33 -04:00
|
|
|
*/
|
|
|
|
|
|
2021-02-19 15:23:34 +01:00
|
|
|
#include "libnm-client-impl/nm-default-libnm.h"
|
2014-11-13 10:07:02 -05:00
|
|
|
|
2019-01-31 13:44:13 +01:00
|
|
|
#include "nm-client.h"
|
|
|
|
|
|
2017-03-20 12:27:17 +01:00
|
|
|
#include <libudev.h>
|
2014-07-24 08:53:33 -04:00
|
|
|
|
2021-02-18 17:37:47 +01:00
|
|
|
#include "libnm-std-aux/c-list-util.h"
|
2021-02-18 17:37:47 +01:00
|
|
|
#include "libnm-glib-aux/nm-c-list.h"
|
|
|
|
|
#include "libnm-glib-aux/nm-dbus-aux.h"
|
2021-02-12 15:01:09 +01:00
|
|
|
#include "libnm-core-aux-intern/nm-common-macros.h"
|
2016-10-18 16:35:07 +02:00
|
|
|
|
|
|
|
|
#include "nm-access-point.h"
|
|
|
|
|
#include "nm-active-connection.h"
|
2017-10-21 16:05:19 +02:00
|
|
|
#include "nm-checkpoint.h"
|
2021-02-12 15:01:09 +01:00
|
|
|
#include "libnm-core-intern/nm-core-internal.h"
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
#include "nm-dbus-helpers.h"
|
2022-05-05 15:39:45 +02:00
|
|
|
#include "nm-wifi-p2p-peer.h"
|
2018-05-22 16:45:05 +02:00
|
|
|
#include "nm-device-6lowpan.h"
|
2016-10-18 16:35:07 +02:00
|
|
|
#include "nm-device-adsl.h"
|
|
|
|
|
#include "nm-device-bond.h"
|
|
|
|
|
#include "nm-device-bridge.h"
|
|
|
|
|
#include "nm-device-bt.h"
|
2017-01-31 14:14:33 +01:00
|
|
|
#include "nm-device-dummy.h"
|
2016-10-18 16:35:07 +02:00
|
|
|
#include "nm-device-ethernet.h"
|
|
|
|
|
#include "nm-device-generic.h"
|
2026-02-02 19:02:29 -05:00
|
|
|
#include "nm-device-geneve.h"
|
2023-10-27 14:01:13 +02:00
|
|
|
#include "nm-device-hsr.h"
|
2016-10-18 16:35:07 +02:00
|
|
|
#include "nm-device-infiniband.h"
|
|
|
|
|
#include "nm-device-ip-tunnel.h"
|
2024-09-06 14:14:24 +02:00
|
|
|
#include "nm-device-ipvlan.h"
|
2022-06-12 19:50:09 -04:00
|
|
|
#include "nm-device-loopback.h"
|
2016-06-30 18:20:22 +02:00
|
|
|
#include "nm-device-macsec.h"
|
2016-10-18 16:35:07 +02:00
|
|
|
#include "nm-device-macvlan.h"
|
|
|
|
|
#include "nm-device-modem.h"
|
2019-01-28 12:39:38 +01:00
|
|
|
#include "nm-device-ovs-bridge.h"
|
2017-10-10 11:04:32 +02:00
|
|
|
#include "nm-device-ovs-interface.h"
|
2017-10-10 11:04:32 +02:00
|
|
|
#include "nm-device-ovs-port.h"
|
2017-06-06 15:55:08 +02:00
|
|
|
#include "nm-device-ppp.h"
|
2016-10-18 16:35:07 +02:00
|
|
|
#include "nm-device-team.h"
|
|
|
|
|
#include "nm-device-tun.h"
|
|
|
|
|
#include "nm-device-vlan.h"
|
|
|
|
|
#include "nm-device-vxlan.h"
|
2019-01-28 12:39:38 +01:00
|
|
|
#include "nm-device-wifi-p2p.h"
|
2016-10-18 16:35:07 +02:00
|
|
|
#include "nm-device-wifi.h"
|
2018-03-13 13:42:38 +00:00
|
|
|
#include "nm-device-wireguard.h"
|
2018-03-09 17:19:36 +01:00
|
|
|
#include "nm-device-wpan.h"
|
2022-05-05 15:39:45 +02:00
|
|
|
#include "nm-device-olpc-mesh.h"
|
2019-01-28 12:39:38 +01:00
|
|
|
#include "nm-dhcp-config.h"
|
2016-10-18 16:35:07 +02:00
|
|
|
#include "nm-dhcp4-config.h"
|
|
|
|
|
#include "nm-dhcp6-config.h"
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
#include "nm-dns-manager.h"
|
2016-10-18 16:35:07 +02:00
|
|
|
#include "nm-ip4-config.h"
|
|
|
|
|
#include "nm-ip6-config.h"
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
#include "nm-object-private.h"
|
2016-10-18 16:35:07 +02:00
|
|
|
#include "nm-remote-connection.h"
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
#include "nm-utils.h"
|
2025-04-27 22:30:16 -04:00
|
|
|
#include "nm-setting-ethtool.h"
|
2016-10-18 16:35:07 +02:00
|
|
|
#include "nm-vpn-connection.h"
|
2014-09-10 13:51:53 -04:00
|
|
|
|
2019-10-11 11:58:09 +02:00
|
|
|
/*****************************************************************************/
|
2014-07-24 08:53:33 -04:00
|
|
|
|
2020-09-23 10:53:06 +02:00
|
|
|
NM_CACHED_QUARK_FCN("nm-context-busy-watcher", nm_context_busy_watcher_quark);
|
libnm: fix leaking internal GMainContext for synchronously initialized NMClient
NMClient makes asynchronous D-Bus calls via g_dbus_connection_call().
This references the current GMainContext to later invoke the
asynchronous callback. Even when we cancel the asynchronous call,
the callback will still be invoked (later) to complete the request.
In particular this means when we destroy (unref) an NMClient, there
are quite possibly pending requests in the GMainContext. Although they
are cancelled, they keep the GMainContext alive.
With synchronous initialization, we have an internal GMainContext.
When we destroy the NMClient, we cannot just unhook the integrated
source, instead, we need to keep it integrated in the caller's main
context, as long as there are pending requests.
Add a mechanism to track those pending requests and fix the leak for the
internal GMainContext. Also expose the same mechanism to the user via a new
API called nm_client_get_context_busy_watcher(). This allows the user
to know when it can stop iterating the main context and when all
resources are reclaimed.
For example the following will lead to a crash:
for i in range(1,2000):
nmc = NM.Client.new(None)
This creates a number of NMClient instances and destroys them again.
Note that here the GMainContext is never iterated, because
synchronous initialization does not iterate the caller's context. So,
while we correctly unref and dispose the created NMClient instances,
there are pending requests left in the inner GMainContext. These pile
up and soon the program will crash because it runs out of file descriptors.
We can have a similar problem with asynchronous initialization, when
we create a new GMainContext per client, and don't iterate it after
we are done with the client.
Note that this patch does not avoid the problem in general. The problem
cannot be avoided, the user must iterate the main contex at some point.
Otherwise resources (memory and file descriptors) will be leaked.
Fixes: ce0e898fb476 ('libnm: refactor caching of D-Bus objects in NMClient')
https://gitlab.freedesktop.org/NetworkManager/NetworkManager/merge_requests/347
2019-11-26 00:23:41 +01:00
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
_context_busy_watcher_attach_integration_source_cb(gpointer data, GObject *where_the_object_was)
|
|
|
|
|
{
|
|
|
|
|
nm_g_source_destroy_and_unref(data);
|
|
|
|
|
}
|
|
|
|
|
|
2019-12-17 13:59:15 +01:00
|
|
|
void
|
|
|
|
|
nm_context_busy_watcher_integrate_source(GMainContext *outer_context,
|
|
|
|
|
GMainContext *inner_context,
|
2021-11-09 13:28:54 +01:00
|
|
|
GObject *context_busy_watcher)
|
libnm: fix leaking internal GMainContext for synchronously initialized NMClient
NMClient makes asynchronous D-Bus calls via g_dbus_connection_call().
This references the current GMainContext to later invoke the
asynchronous callback. Even when we cancel the asynchronous call,
the callback will still be invoked (later) to complete the request.
In particular this means when we destroy (unref) an NMClient, there
are quite possibly pending requests in the GMainContext. Although they
are cancelled, they keep the GMainContext alive.
With synchronous initialization, we have an internal GMainContext.
When we destroy the NMClient, we cannot just unhook the integrated
source, instead, we need to keep it integrated in the caller's main
context, as long as there are pending requests.
Add a mechanism to track those pending requests and fix the leak for the
internal GMainContext. Also expose the same mechanism to the user via a new
API called nm_client_get_context_busy_watcher(). This allows the user
to know when it can stop iterating the main context and when all
resources are reclaimed.
For example the following will lead to a crash:
for i in range(1,2000):
nmc = NM.Client.new(None)
This creates a number of NMClient instances and destroys them again.
Note that here the GMainContext is never iterated, because
synchronous initialization does not iterate the caller's context. So,
while we correctly unref and dispose the created NMClient instances,
there are pending requests left in the inner GMainContext. These pile
up and soon the program will crash because it runs out of file descriptors.
We can have a similar problem with asynchronous initialization, when
we create a new GMainContext per client, and don't iterate it after
we are done with the client.
Note that this patch does not avoid the problem in general. The problem
cannot be avoided, the user must iterate the main contex at some point.
Otherwise resources (memory and file descriptors) will be leaked.
Fixes: ce0e898fb476 ('libnm: refactor caching of D-Bus objects in NMClient')
https://gitlab.freedesktop.org/NetworkManager/NetworkManager/merge_requests/347
2019-11-26 00:23:41 +01:00
|
|
|
{
|
2019-12-17 13:59:15 +01:00
|
|
|
GSource *source;
|
|
|
|
|
|
|
|
|
|
nm_assert(outer_context);
|
|
|
|
|
nm_assert(inner_context);
|
|
|
|
|
nm_assert(outer_context != inner_context);
|
|
|
|
|
nm_assert(G_IS_OBJECT(context_busy_watcher));
|
|
|
|
|
|
|
|
|
|
source = nm_utils_g_main_context_create_integrate_source(inner_context);
|
|
|
|
|
g_source_attach(source, outer_context);
|
|
|
|
|
|
libnm: fix leaking internal GMainContext for synchronously initialized NMClient
NMClient makes asynchronous D-Bus calls via g_dbus_connection_call().
This references the current GMainContext to later invoke the
asynchronous callback. Even when we cancel the asynchronous call,
the callback will still be invoked (later) to complete the request.
In particular this means when we destroy (unref) an NMClient, there
are quite possibly pending requests in the GMainContext. Although they
are cancelled, they keep the GMainContext alive.
With synchronous initialization, we have an internal GMainContext.
When we destroy the NMClient, we cannot just unhook the integrated
source, instead, we need to keep it integrated in the caller's main
context, as long as there are pending requests.
Add a mechanism to track those pending requests and fix the leak for the
internal GMainContext. Also expose the same mechanism to the user via a new
API called nm_client_get_context_busy_watcher(). This allows the user
to know when it can stop iterating the main context and when all
resources are reclaimed.
For example the following will lead to a crash:
for i in range(1,2000):
nmc = NM.Client.new(None)
This creates a number of NMClient instances and destroys them again.
Note that here the GMainContext is never iterated, because
synchronous initialization does not iterate the caller's context. So,
while we correctly unref and dispose the created NMClient instances,
there are pending requests left in the inner GMainContext. These pile
up and soon the program will crash because it runs out of file descriptors.
We can have a similar problem with asynchronous initialization, when
we create a new GMainContext per client, and don't iterate it after
we are done with the client.
Note that this patch does not avoid the problem in general. The problem
cannot be avoided, the user must iterate the main contex at some point.
Otherwise resources (memory and file descriptors) will be leaked.
Fixes: ce0e898fb476 ('libnm: refactor caching of D-Bus objects in NMClient')
https://gitlab.freedesktop.org/NetworkManager/NetworkManager/merge_requests/347
2019-11-26 00:23:41 +01:00
|
|
|
/* The problem is...
|
|
|
|
|
*
|
|
|
|
|
* NMClient is associated with a GMainContext, just like its underlying GDBusConnection
|
|
|
|
|
* also queues signals and callbacks on that main context. During operation, NMClient
|
2021-04-18 08:34:27 +02:00
|
|
|
* will schedule async operations which will return asynchronously on that GMainContext.
|
libnm: fix leaking internal GMainContext for synchronously initialized NMClient
NMClient makes asynchronous D-Bus calls via g_dbus_connection_call().
This references the current GMainContext to later invoke the
asynchronous callback. Even when we cancel the asynchronous call,
the callback will still be invoked (later) to complete the request.
In particular this means when we destroy (unref) an NMClient, there
are quite possibly pending requests in the GMainContext. Although they
are cancelled, they keep the GMainContext alive.
With synchronous initialization, we have an internal GMainContext.
When we destroy the NMClient, we cannot just unhook the integrated
source, instead, we need to keep it integrated in the caller's main
context, as long as there are pending requests.
Add a mechanism to track those pending requests and fix the leak for the
internal GMainContext. Also expose the same mechanism to the user via a new
API called nm_client_get_context_busy_watcher(). This allows the user
to know when it can stop iterating the main context and when all
resources are reclaimed.
For example the following will lead to a crash:
for i in range(1,2000):
nmc = NM.Client.new(None)
This creates a number of NMClient instances and destroys them again.
Note that here the GMainContext is never iterated, because
synchronous initialization does not iterate the caller's context. So,
while we correctly unref and dispose the created NMClient instances,
there are pending requests left in the inner GMainContext. These pile
up and soon the program will crash because it runs out of file descriptors.
We can have a similar problem with asynchronous initialization, when
we create a new GMainContext per client, and don't iterate it after
we are done with the client.
Note that this patch does not avoid the problem in general. The problem
cannot be avoided, the user must iterate the main contex at some point.
Otherwise resources (memory and file descriptors) will be leaked.
Fixes: ce0e898fb476 ('libnm: refactor caching of D-Bus objects in NMClient')
https://gitlab.freedesktop.org/NetworkManager/NetworkManager/merge_requests/347
2019-11-26 00:23:41 +01:00
|
|
|
*
|
|
|
|
|
* Note that depending on whether NMClient got initialized synchronously or asynchronously,
|
|
|
|
|
* it has an internal priv->dbus_context that is different from the outer priv->main_context.
|
|
|
|
|
* However, the problem is in both cases.
|
|
|
|
|
*
|
|
|
|
|
* So, as long as there are pending D-Bus calls, the GMainContext is referenced and kept alive.
|
|
|
|
|
* When NMClient gets destroyed, the pending calls get cancelled, but the async callback are still
|
|
|
|
|
* scheduled to return.
|
|
|
|
|
* That means, the main context stays alive until it gets iterated long enough so that all pending
|
|
|
|
|
* operations are completed.
|
|
|
|
|
*
|
|
|
|
|
* Note that pending operations don't keep NMClient alive, so NMClient can already be gone by
|
|
|
|
|
* then, but the user still should iterate the main context long enough to process the (cancelled)
|
|
|
|
|
* callbacks... at least, if the user cares about whether the remaining memory and file descriptors
|
|
|
|
|
* of the GMainContext can be reclaimed.
|
|
|
|
|
*
|
|
|
|
|
* In hindsight, maybe pending references should kept NMClient alive. But then NMClient would
|
|
|
|
|
* need a special "shutdown()" API that the user must invoke, because unrefing would no longer
|
|
|
|
|
* be enough to ensure a shutdown (imagine a situation where NMClient receives a constant flow
|
|
|
|
|
* of "CheckPermissions" signals, which keeps retriggering an async request). Anyway, we cannot
|
|
|
|
|
* add such a shutdown API now, as it would break client's expectations that they can just unref
|
|
|
|
|
* the NMClient to destroy it.
|
|
|
|
|
*
|
|
|
|
|
* So, we allow NMClient to unref, but the user is advised to keep iterating the main context.
|
|
|
|
|
* But for how long? Here comes nm_client_get_context_busy_watcher() into play. The user may
|
|
|
|
|
* subscribe a weak pointer to that instance and should keep iterating as long as the object
|
|
|
|
|
* exists.
|
|
|
|
|
*
|
2021-04-18 08:34:27 +02:00
|
|
|
* Now, back to synchronous initialization: here we have the internal priv->dbus_context context.
|
libnm: fix leaking internal GMainContext for synchronously initialized NMClient
NMClient makes asynchronous D-Bus calls via g_dbus_connection_call().
This references the current GMainContext to later invoke the
asynchronous callback. Even when we cancel the asynchronous call,
the callback will still be invoked (later) to complete the request.
In particular this means when we destroy (unref) an NMClient, there
are quite possibly pending requests in the GMainContext. Although they
are cancelled, they keep the GMainContext alive.
With synchronous initialization, we have an internal GMainContext.
When we destroy the NMClient, we cannot just unhook the integrated
source, instead, we need to keep it integrated in the caller's main
context, as long as there are pending requests.
Add a mechanism to track those pending requests and fix the leak for the
internal GMainContext. Also expose the same mechanism to the user via a new
API called nm_client_get_context_busy_watcher(). This allows the user
to know when it can stop iterating the main context and when all
resources are reclaimed.
For example the following will lead to a crash:
for i in range(1,2000):
nmc = NM.Client.new(None)
This creates a number of NMClient instances and destroys them again.
Note that here the GMainContext is never iterated, because
synchronous initialization does not iterate the caller's context. So,
while we correctly unref and dispose the created NMClient instances,
there are pending requests left in the inner GMainContext. These pile
up and soon the program will crash because it runs out of file descriptors.
We can have a similar problem with asynchronous initialization, when
we create a new GMainContext per client, and don't iterate it after
we are done with the client.
Note that this patch does not avoid the problem in general. The problem
cannot be avoided, the user must iterate the main contex at some point.
Otherwise resources (memory and file descriptors) will be leaked.
Fixes: ce0e898fb476 ('libnm: refactor caching of D-Bus objects in NMClient')
https://gitlab.freedesktop.org/NetworkManager/NetworkManager/merge_requests/347
2019-11-26 00:23:41 +01:00
|
|
|
* We also cannot remove that context right away, instead we need to keep it integrated in the
|
|
|
|
|
* caller's priv->main_context as long as we have pending calls: that is, as long as the
|
|
|
|
|
* context-busy-watcher is alive.
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
g_object_weak_ref(context_busy_watcher,
|
|
|
|
|
_context_busy_watcher_attach_integration_source_cb,
|
2019-12-17 13:59:15 +01:00
|
|
|
source);
|
libnm: fix leaking internal GMainContext for synchronously initialized NMClient
NMClient makes asynchronous D-Bus calls via g_dbus_connection_call().
This references the current GMainContext to later invoke the
asynchronous callback. Even when we cancel the asynchronous call,
the callback will still be invoked (later) to complete the request.
In particular this means when we destroy (unref) an NMClient, there
are quite possibly pending requests in the GMainContext. Although they
are cancelled, they keep the GMainContext alive.
With synchronous initialization, we have an internal GMainContext.
When we destroy the NMClient, we cannot just unhook the integrated
source, instead, we need to keep it integrated in the caller's main
context, as long as there are pending requests.
Add a mechanism to track those pending requests and fix the leak for the
internal GMainContext. Also expose the same mechanism to the user via a new
API called nm_client_get_context_busy_watcher(). This allows the user
to know when it can stop iterating the main context and when all
resources are reclaimed.
For example the following will lead to a crash:
for i in range(1,2000):
nmc = NM.Client.new(None)
This creates a number of NMClient instances and destroys them again.
Note that here the GMainContext is never iterated, because
synchronous initialization does not iterate the caller's context. So,
while we correctly unref and dispose the created NMClient instances,
there are pending requests left in the inner GMainContext. These pile
up and soon the program will crash because it runs out of file descriptors.
We can have a similar problem with asynchronous initialization, when
we create a new GMainContext per client, and don't iterate it after
we are done with the client.
Note that this patch does not avoid the problem in general. The problem
cannot be avoided, the user must iterate the main contex at some point.
Otherwise resources (memory and file descriptors) will be leaked.
Fixes: ce0e898fb476 ('libnm: refactor caching of D-Bus objects in NMClient')
https://gitlab.freedesktop.org/NetworkManager/NetworkManager/merge_requests/347
2019-11-26 00:23:41 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*****************************************************************************/
|
|
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
typedef struct {
|
|
|
|
|
/* It is quite wasteful to require 2 pointers per property (of an instance) only to track whether
|
|
|
|
|
* the property got changed. But it's convenient! */
|
|
|
|
|
CList changed_prop_lst;
|
|
|
|
|
|
|
|
|
|
GVariant *prop_data_value;
|
|
|
|
|
} NMLDBusObjPropData;
|
|
|
|
|
|
|
|
|
|
typedef struct {
|
|
|
|
|
CList iface_lst;
|
|
|
|
|
union {
|
|
|
|
|
const NMLDBusMetaIface *meta;
|
2021-11-09 13:28:54 +01:00
|
|
|
NMRefString *name;
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
} dbus_iface;
|
|
|
|
|
|
|
|
|
|
CList changed_prop_lst_head;
|
|
|
|
|
|
|
|
|
|
/* We also keep track of non-well known interfaces. The presence of a D-Bus interface
|
|
|
|
|
* is what makes a D-Bus alive or not. As we should track all D-Bus objects, we also
|
|
|
|
|
* need to track whether there are any interfaces on it -- even if we otherwise don't
|
|
|
|
|
* care about the interface. */
|
|
|
|
|
bool dbus_iface_is_wellknown : 1;
|
|
|
|
|
|
|
|
|
|
/* if TRUE, the interface is about to be removed. */
|
|
|
|
|
bool iface_removed : 1;
|
|
|
|
|
|
|
|
|
|
bool nmobj_checked : 1;
|
|
|
|
|
bool nmobj_compatible : 1;
|
|
|
|
|
|
|
|
|
|
NMLDBusObjPropData prop_datas[];
|
|
|
|
|
} NMLDBusObjIfaceData;
|
|
|
|
|
|
|
|
|
|
/* The dbus_path must be the first element, so when we hash the object by the dbus_path,
|
|
|
|
|
* we also can lookup the object by only having a NMRefString at hand
|
|
|
|
|
* using nm_pdirect_hash()/nm_pdirect_equal(). */
|
|
|
|
|
G_STATIC_ASSERT(G_STRUCT_OFFSET(NMLDBusObject, dbus_path) == 0);
|
|
|
|
|
|
|
|
|
|
typedef void (*NMLDBusObjWatchNotifyFcn)(NMClient *client, gpointer obj_watcher);
|
|
|
|
|
|
|
|
|
|
struct _NMLDBusObjWatcher {
|
|
|
|
|
NMLDBusObject *dbobj;
|
|
|
|
|
struct {
|
|
|
|
|
CList watcher_lst;
|
|
|
|
|
NMLDBusObjWatchNotifyFcn notify_fcn;
|
|
|
|
|
} _priv;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
typedef struct {
|
|
|
|
|
NMLDBusObjWatcher parent;
|
|
|
|
|
gpointer user_data;
|
|
|
|
|
} NMLDBusObjWatcherWithPtr;
|
|
|
|
|
|
|
|
|
|
/*****************************************************************************/
|
2014-07-24 08:53:33 -04:00
|
|
|
|
2019-10-10 17:57:30 +02:00
|
|
|
NM_GOBJECT_PROPERTIES_DEFINE(NMClient,
|
2019-10-12 14:59:23 +02:00
|
|
|
PROP_DBUS_CONNECTION,
|
2019-10-15 16:08:36 +02:00
|
|
|
PROP_DBUS_NAME_OWNER,
|
2014-07-24 08:53:33 -04:00
|
|
|
PROP_VERSION,
|
2019-12-05 18:17:43 +01:00
|
|
|
PROP_INSTANCE_FLAGS,
|
2014-07-24 08:53:33 -04:00
|
|
|
PROP_STATE,
|
|
|
|
|
PROP_STARTUP,
|
2014-08-05 10:05:26 -04:00
|
|
|
PROP_NM_RUNNING,
|
2014-07-24 08:53:33 -04:00
|
|
|
PROP_NETWORKING_ENABLED,
|
|
|
|
|
PROP_WIRELESS_ENABLED,
|
|
|
|
|
PROP_WIRELESS_HARDWARE_ENABLED,
|
|
|
|
|
PROP_WWAN_ENABLED,
|
|
|
|
|
PROP_WWAN_HARDWARE_ENABLED,
|
|
|
|
|
PROP_WIMAX_ENABLED,
|
|
|
|
|
PROP_WIMAX_HARDWARE_ENABLED,
|
2022-03-21 10:20:11 +01:00
|
|
|
PROP_RADIO_FLAGS,
|
2014-07-24 08:53:33 -04:00
|
|
|
PROP_ACTIVE_CONNECTIONS,
|
|
|
|
|
PROP_CONNECTIVITY,
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
PROP_CONNECTIVITY_CHECK_URI,
|
2017-08-09 15:21:28 +08:00
|
|
|
PROP_CONNECTIVITY_CHECK_AVAILABLE,
|
|
|
|
|
PROP_CONNECTIVITY_CHECK_ENABLED,
|
2014-07-24 08:53:33 -04:00
|
|
|
PROP_PRIMARY_CONNECTION,
|
|
|
|
|
PROP_ACTIVATING_CONNECTION,
|
|
|
|
|
PROP_DEVICES,
|
2014-10-10 14:28:49 -05:00
|
|
|
PROP_ALL_DEVICES,
|
2014-09-29 10:58:16 -04:00
|
|
|
PROP_CONNECTIONS,
|
|
|
|
|
PROP_HOSTNAME,
|
|
|
|
|
PROP_CAN_MODIFY,
|
2015-06-03 09:15:24 +02:00
|
|
|
PROP_METERED,
|
2016-10-25 11:11:12 +02:00
|
|
|
PROP_DNS_MODE,
|
|
|
|
|
PROP_DNS_RC_MANAGER,
|
|
|
|
|
PROP_DNS_CONFIGURATION,
|
2017-10-21 16:05:19 +02:00
|
|
|
PROP_CHECKPOINTS,
|
core: add "VersionInfo" property on D-Bus and NMClient
This exposes NM_VERSION as number (contrary to the "Version", which is a
string). That is in particular useful, because the number can be
compared with <> due to the encoding of the version.
While at it, don't make it a single number. Expose an array of numbers,
where the following numbers are a bitfield of capabilities.
Note that before commit 3c67a1ec5e8e ('cli: remove version check against
NM'), we used to parse the "Version" string to detect the version. As
such, the information that "VersionInfo" exposes now, was already
(somewhat) available, you just had to parse the string. The main benefit of
"VersionInfo" is that it can expose capabilities (patched behavior) in
in a lightweight bitfield. To include the numerical version there is
just useful on top.
Currently no additional capabilities are exposed. The idea is of course
to have a place in the future, where we can expose additional
capabilities. Adding a capability flag is most useful for behavior that we
backport to older branches. Otherwise, we could just check the daemon version
alone. But since we only add "VersionInfo" property only now, we cannot backport
any capability further than this, because the "VersionInfo" property itself
won't be backported. As such, this will only be useful in the future by having
a place where we can add (and backport) capabilities.
Note that there is some overlap with the existing "Capability" property
and NMCapability enum. The difference is that adding a capability via "VersionInfo"
is only one bit, and thus cheaper. Most importantly, having it cheaper means
the downsides of adding a capability flag is significantly removed. In
practice, we could live without capabilities for a long time, so they
must be very cheap for them to be worth to add. Another difference might be,
that we will want that the VersionInfo is about compile time defaults (e.g.
a certain patch/behavior that is in or not), while NM_CAPABILITY_TEAM depends on
whether the team plugin is loaded at runtime.
2022-12-13 12:25:04 +01:00
|
|
|
PROP_VERSION_INFO,
|
2019-12-20 18:10:55 +01:00
|
|
|
PROP_CAPABILITIES,
|
2019-12-05 15:53:51 +01:00
|
|
|
PROP_PERMISSIONS_STATE, );
|
2014-07-24 08:53:33 -04:00
|
|
|
|
|
|
|
|
enum {
|
|
|
|
|
DEVICE_ADDED,
|
|
|
|
|
DEVICE_REMOVED,
|
2014-10-10 14:28:49 -05:00
|
|
|
ANY_DEVICE_ADDED,
|
|
|
|
|
ANY_DEVICE_REMOVED,
|
2014-07-24 08:53:33 -04:00
|
|
|
PERMISSION_CHANGED,
|
2014-09-29 10:58:16 -04:00
|
|
|
CONNECTION_ADDED,
|
|
|
|
|
CONNECTION_REMOVED,
|
2016-07-07 11:35:19 +02:00
|
|
|
ACTIVE_CONNECTION_ADDED,
|
|
|
|
|
ACTIVE_CONNECTION_REMOVED,
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2014-07-24 08:53:33 -04:00
|
|
|
LAST_SIGNAL
|
|
|
|
|
};
|
|
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
static guint signals[LAST_SIGNAL] = {0};
|
|
|
|
|
|
|
|
|
|
enum {
|
|
|
|
|
PROPERTY_O_IDX_NM_ACTIVATING_CONNECTION = 0,
|
|
|
|
|
PROPERTY_O_IDX_NM_PRIMAY_CONNECTION,
|
|
|
|
|
_PROPERTY_O_IDX_NM_NUM,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
enum {
|
|
|
|
|
PROPERTY_AO_IDX_DEVICES = 0,
|
|
|
|
|
PROPERTY_AO_IDX_ALL_DEVICES,
|
|
|
|
|
PROPERTY_AO_IDX_ACTIVE_CONNECTIONS,
|
|
|
|
|
PROPERTY_AO_IDX_CHECKPOINTS,
|
|
|
|
|
_PROPERTY_AO_IDX_NM_NUM,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
typedef struct {
|
2021-11-09 13:28:54 +01:00
|
|
|
struct udev *udev;
|
|
|
|
|
GMainContext *main_context;
|
|
|
|
|
GMainContext *dbus_context;
|
|
|
|
|
GObject *context_busy_watcher;
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
GDBusConnection *dbus_connection;
|
2021-11-09 13:28:54 +01:00
|
|
|
NMLInitData *init_data;
|
|
|
|
|
GHashTable *dbus_objects;
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
CList obj_changed_lst_head;
|
2021-11-09 13:28:54 +01:00
|
|
|
GCancellable *name_owner_get_cancellable;
|
|
|
|
|
GCancellable *get_managed_objects_cancellable;
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
CList queue_notify_lst_head;
|
|
|
|
|
CList notify_event_lst_head;
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
CList dbus_objects_lst_head_watched_only;
|
|
|
|
|
CList dbus_objects_lst_head_on_dbus;
|
|
|
|
|
CList dbus_objects_lst_head_with_nmobj_not_ready;
|
|
|
|
|
CList dbus_objects_lst_head_with_nmobj_ready;
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
NMLDBusObject *dbobj_nm;
|
|
|
|
|
NMLDBusObject *dbobj_settings;
|
|
|
|
|
NMLDBusObject *dbobj_dns_manager;
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2020-07-15 12:39:50 +02:00
|
|
|
gsize log_call_counter;
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2021-11-09 13:28:54 +01:00
|
|
|
guint8 *permissions;
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
GCancellable *permissions_cancellable;
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
char *name_owner;
|
|
|
|
|
guint name_owner_changed_id;
|
|
|
|
|
guint dbsid_nm_object_manager;
|
|
|
|
|
guint dbsid_dbus_properties_properties_changed;
|
|
|
|
|
guint dbsid_nm_settings_connection_updated;
|
|
|
|
|
guint dbsid_nm_connection_active_state_changed;
|
|
|
|
|
guint dbsid_nm_vpn_connection_state_changed;
|
|
|
|
|
guint dbsid_nm_check_permissions;
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2022-10-06 10:13:14 +02:00
|
|
|
NMClientInstanceFlags instance_flags : 5;
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2019-12-05 15:53:51 +01:00
|
|
|
NMTernary permissions_state : 3;
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2019-12-05 18:17:43 +01:00
|
|
|
bool instance_flags_constructed : 1;
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
bool udev_inited : 1;
|
|
|
|
|
bool notify_event_lst_changed : 1;
|
|
|
|
|
bool check_dbobj_visible_all : 1;
|
|
|
|
|
bool nm_running : 1;
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
struct {
|
|
|
|
|
NMLDBusPropertyO property_o[_PROPERTY_O_IDX_NM_NUM];
|
|
|
|
|
NMLDBusPropertyAO property_ao[_PROPERTY_AO_IDX_NM_NUM];
|
2021-11-09 13:28:54 +01:00
|
|
|
char *connectivity_check_uri;
|
|
|
|
|
char *version;
|
|
|
|
|
guint32 *capabilities_arr;
|
core: add "VersionInfo" property on D-Bus and NMClient
This exposes NM_VERSION as number (contrary to the "Version", which is a
string). That is in particular useful, because the number can be
compared with <> due to the encoding of the version.
While at it, don't make it a single number. Expose an array of numbers,
where the following numbers are a bitfield of capabilities.
Note that before commit 3c67a1ec5e8e ('cli: remove version check against
NM'), we used to parse the "Version" string to detect the version. As
such, the information that "VersionInfo" exposes now, was already
(somewhat) available, you just had to parse the string. The main benefit of
"VersionInfo" is that it can expose capabilities (patched behavior) in
in a lightweight bitfield. To include the numerical version there is
just useful on top.
Currently no additional capabilities are exposed. The idea is of course
to have a place in the future, where we can expose additional
capabilities. Adding a capability flag is most useful for behavior that we
backport to older branches. Otherwise, we could just check the daemon version
alone. But since we only add "VersionInfo" property only now, we cannot backport
any capability further than this, because the "VersionInfo" property itself
won't be backported. As such, this will only be useful in the future by having
a place where we can add (and backport) capabilities.
Note that there is some overlap with the existing "Capability" property
and NMCapability enum. The difference is that adding a capability via "VersionInfo"
is only one bit, and thus cheaper. Most importantly, having it cheaper means
the downsides of adding a capability flag is significantly removed. In
practice, we could live without capabilities for a long time, so they
must be very cheap for them to be worth to add. Another difference might be,
that we will want that the VersionInfo is about compile time defaults (e.g.
a certain patch/behavior that is in or not), while NM_CAPABILITY_TEAM depends on
whether the team plugin is loaded at runtime.
2022-12-13 12:25:04 +01:00
|
|
|
guint32 *version_info_arr;
|
2019-12-20 18:10:55 +01:00
|
|
|
gsize capabilities_len;
|
core: add "VersionInfo" property on D-Bus and NMClient
This exposes NM_VERSION as number (contrary to the "Version", which is a
string). That is in particular useful, because the number can be
compared with <> due to the encoding of the version.
While at it, don't make it a single number. Expose an array of numbers,
where the following numbers are a bitfield of capabilities.
Note that before commit 3c67a1ec5e8e ('cli: remove version check against
NM'), we used to parse the "Version" string to detect the version. As
such, the information that "VersionInfo" exposes now, was already
(somewhat) available, you just had to parse the string. The main benefit of
"VersionInfo" is that it can expose capabilities (patched behavior) in
in a lightweight bitfield. To include the numerical version there is
just useful on top.
Currently no additional capabilities are exposed. The idea is of course
to have a place in the future, where we can expose additional
capabilities. Adding a capability flag is most useful for behavior that we
backport to older branches. Otherwise, we could just check the daemon version
alone. But since we only add "VersionInfo" property only now, we cannot backport
any capability further than this, because the "VersionInfo" property itself
won't be backported. As such, this will only be useful in the future by having
a place where we can add (and backport) capabilities.
Note that there is some overlap with the existing "Capability" property
and NMCapability enum. The difference is that adding a capability via "VersionInfo"
is only one bit, and thus cheaper. Most importantly, having it cheaper means
the downsides of adding a capability flag is significantly removed. In
practice, we could live without capabilities for a long time, so they
must be very cheap for them to be worth to add. Another difference might be,
that we will want that the VersionInfo is about compile time defaults (e.g.
a certain patch/behavior that is in or not), while NM_CAPABILITY_TEAM depends on
whether the team plugin is loaded at runtime.
2022-12-13 12:25:04 +01:00
|
|
|
gsize version_info_len;
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
guint32 connectivity;
|
|
|
|
|
guint32 state;
|
|
|
|
|
guint32 metered;
|
2022-03-21 10:20:11 +01:00
|
|
|
guint32 radio_flags;
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
bool connectivity_check_available;
|
|
|
|
|
bool connectivity_check_enabled;
|
|
|
|
|
bool networking_enabled;
|
|
|
|
|
bool startup;
|
|
|
|
|
bool wireless_enabled;
|
|
|
|
|
bool wireless_hardware_enabled;
|
|
|
|
|
bool wwan_enabled;
|
|
|
|
|
bool wwan_hardware_enabled;
|
|
|
|
|
} nm;
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
struct {
|
|
|
|
|
NMLDBusPropertyAO connections;
|
2021-11-09 13:28:54 +01:00
|
|
|
char *hostname;
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
bool can_modify;
|
|
|
|
|
} settings;
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
struct {
|
|
|
|
|
GPtrArray *configuration;
|
2021-11-09 13:28:54 +01:00
|
|
|
char *mode;
|
|
|
|
|
char *rc_manager;
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
} dns_manager;
|
|
|
|
|
|
|
|
|
|
} NMClientPrivate;
|
|
|
|
|
|
|
|
|
|
struct _NMClient {
|
|
|
|
|
union {
|
|
|
|
|
GObject parent;
|
|
|
|
|
NMObjectBase obj_base;
|
|
|
|
|
};
|
|
|
|
|
NMClientPrivate _priv;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
struct _NMClientClass {
|
|
|
|
|
union {
|
|
|
|
|
GObjectClass parent;
|
|
|
|
|
NMObjectBaseClass obj_base;
|
|
|
|
|
};
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
static void nm_client_initable_iface_init(GInitableIface *iface);
|
|
|
|
|
static void nm_client_async_initable_iface_init(GAsyncInitableIface *iface);
|
|
|
|
|
|
|
|
|
|
G_DEFINE_TYPE_WITH_CODE(NMClient,
|
|
|
|
|
nm_client,
|
|
|
|
|
G_TYPE_OBJECT,
|
|
|
|
|
G_IMPLEMENT_INTERFACE(G_TYPE_INITABLE, nm_client_initable_iface_init);
|
|
|
|
|
G_IMPLEMENT_INTERFACE(G_TYPE_ASYNC_INITABLE,
|
|
|
|
|
nm_client_async_initable_iface_init);)
|
|
|
|
|
|
|
|
|
|
#define NM_CLIENT_GET_PRIVATE(self) _NM_GET_PRIVATE(self, NMClient, NM_IS_CLIENT)
|
|
|
|
|
|
|
|
|
|
/*****************************************************************************/
|
|
|
|
|
|
|
|
|
|
static void _init_start_check_complete(NMClient *self);
|
|
|
|
|
|
|
|
|
|
static void name_owner_changed_cb(GDBusConnection *connection,
|
2021-11-09 13:28:54 +01:00
|
|
|
const char *sender_name,
|
|
|
|
|
const char *object_path,
|
|
|
|
|
const char *interface_name,
|
|
|
|
|
const char *signal_name,
|
|
|
|
|
GVariant *parameters,
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
gpointer user_data);
|
|
|
|
|
|
libnm: fix leaking internal GMainContext for synchronously initialized NMClient
NMClient makes asynchronous D-Bus calls via g_dbus_connection_call().
This references the current GMainContext to later invoke the
asynchronous callback. Even when we cancel the asynchronous call,
the callback will still be invoked (later) to complete the request.
In particular this means when we destroy (unref) an NMClient, there
are quite possibly pending requests in the GMainContext. Although they
are cancelled, they keep the GMainContext alive.
With synchronous initialization, we have an internal GMainContext.
When we destroy the NMClient, we cannot just unhook the integrated
source, instead, we need to keep it integrated in the caller's main
context, as long as there are pending requests.
Add a mechanism to track those pending requests and fix the leak for the
internal GMainContext. Also expose the same mechanism to the user via a new
API called nm_client_get_context_busy_watcher(). This allows the user
to know when it can stop iterating the main context and when all
resources are reclaimed.
For example the following will lead to a crash:
for i in range(1,2000):
nmc = NM.Client.new(None)
This creates a number of NMClient instances and destroys them again.
Note that here the GMainContext is never iterated, because
synchronous initialization does not iterate the caller's context. So,
while we correctly unref and dispose the created NMClient instances,
there are pending requests left in the inner GMainContext. These pile
up and soon the program will crash because it runs out of file descriptors.
We can have a similar problem with asynchronous initialization, when
we create a new GMainContext per client, and don't iterate it after
we are done with the client.
Note that this patch does not avoid the problem in general. The problem
cannot be avoided, the user must iterate the main contex at some point.
Otherwise resources (memory and file descriptors) will be leaked.
Fixes: ce0e898fb476 ('libnm: refactor caching of D-Bus objects in NMClient')
https://gitlab.freedesktop.org/NetworkManager/NetworkManager/merge_requests/347
2019-11-26 00:23:41 +01:00
|
|
|
static void name_owner_get_call(NMClient *self);
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
|
2023-01-16 08:10:37 +01:00
|
|
|
static void _set_nm_running(NMClient *self, gboolean queue_notify);
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
|
|
|
|
|
/*****************************************************************************/
|
|
|
|
|
|
|
|
|
|
static NMRefString *_dbus_path_nm = NULL;
|
|
|
|
|
static NMRefString *_dbus_path_settings = NULL;
|
|
|
|
|
static NMRefString *_dbus_path_dns_manager = NULL;
|
|
|
|
|
|
|
|
|
|
/*****************************************************************************/
|
|
|
|
|
|
2020-02-13 14:55:26 +01:00
|
|
|
static NM_UTILS_LOOKUP_STR_DEFINE(
|
|
|
|
|
nml_dbus_obj_state_to_string,
|
|
|
|
|
NMLDBusObjState,
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
NM_UTILS_LOOKUP_DEFAULT_WARN("???"),
|
|
|
|
|
NM_UTILS_LOOKUP_ITEM(NML_DBUS_OBJ_STATE_UNLINKED, "unlinked"),
|
|
|
|
|
NM_UTILS_LOOKUP_ITEM(NML_DBUS_OBJ_STATE_WATCHED_ONLY, "watched-only"),
|
|
|
|
|
NM_UTILS_LOOKUP_ITEM(NML_DBUS_OBJ_STATE_ON_DBUS, "on-dbus"),
|
|
|
|
|
NM_UTILS_LOOKUP_ITEM(NML_DBUS_OBJ_STATE_WITH_NMOBJ_NOT_READY, "not-ready"),
|
|
|
|
|
NM_UTILS_LOOKUP_ITEM(NML_DBUS_OBJ_STATE_WITH_NMOBJ_READY, "ready"), );
|
|
|
|
|
|
|
|
|
|
/*****************************************************************************/
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* nm_client_error_quark:
|
|
|
|
|
*
|
|
|
|
|
* Registers an error quark for #NMClient if necessary.
|
|
|
|
|
*
|
|
|
|
|
* Returns: the error quark used for #NMClient errors.
|
|
|
|
|
**/
|
2020-09-23 10:53:06 +02:00
|
|
|
NM_CACHED_QUARK_FCN("nm-client-error-quark", nm_client_error_quark);
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
|
|
|
|
|
/*****************************************************************************/
|
|
|
|
|
|
2019-12-17 14:38:29 +01:00
|
|
|
NMLInitData *
|
|
|
|
|
nml_init_data_new_sync(GCancellable *cancellable, GMainLoop *main_loop, GError **error_location)
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
{
|
2019-12-17 14:38:29 +01:00
|
|
|
NMLInitData *init_data;
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
|
2019-12-17 14:38:29 +01:00
|
|
|
init_data = g_slice_new(NMLInitData);
|
2024-10-04 10:45:56 +02:00
|
|
|
*init_data = (NMLInitData) {
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
.cancellable = nm_g_object_ref(cancellable),
|
|
|
|
|
.is_sync = TRUE,
|
|
|
|
|
.data.sync =
|
|
|
|
|
{
|
|
|
|
|
.main_loop = main_loop,
|
|
|
|
|
.error_location = error_location,
|
|
|
|
|
},
|
|
|
|
|
};
|
|
|
|
|
return init_data;
|
|
|
|
|
}
|
|
|
|
|
|
2019-12-17 14:38:29 +01:00
|
|
|
NMLInitData *
|
|
|
|
|
nml_init_data_new_async(GCancellable *cancellable, GTask *task_take)
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
{
|
2019-12-17 14:38:29 +01:00
|
|
|
NMLInitData *init_data;
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
|
2019-12-17 14:38:29 +01:00
|
|
|
init_data = g_slice_new(NMLInitData);
|
2024-10-04 10:45:56 +02:00
|
|
|
*init_data = (NMLInitData) {
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
.cancellable = nm_g_object_ref(cancellable),
|
|
|
|
|
.is_sync = FALSE,
|
|
|
|
|
.data.async =
|
|
|
|
|
{
|
|
|
|
|
.task = g_steal_pointer(&task_take),
|
|
|
|
|
},
|
|
|
|
|
};
|
|
|
|
|
return init_data;
|
|
|
|
|
}
|
|
|
|
|
|
2019-12-18 17:12:45 +01:00
|
|
|
void
|
|
|
|
|
nml_init_data_return(NMLInitData *init_data, GError *error_take)
|
|
|
|
|
{
|
|
|
|
|
nm_assert(init_data);
|
|
|
|
|
|
|
|
|
|
nm_clear_pointer(&init_data->cancel_on_idle_source, nm_g_source_destroy_and_unref);
|
|
|
|
|
nm_clear_g_signal_handler(init_data->cancellable, &init_data->cancelled_id);
|
|
|
|
|
|
|
|
|
|
if (init_data->is_sync) {
|
|
|
|
|
if (error_take)
|
|
|
|
|
g_propagate_error(init_data->data.sync.error_location, error_take);
|
|
|
|
|
g_main_loop_quit(init_data->data.sync.main_loop);
|
|
|
|
|
} else {
|
|
|
|
|
if (error_take)
|
|
|
|
|
g_task_return_error(init_data->data.async.task, error_take);
|
|
|
|
|
else
|
|
|
|
|
g_task_return_boolean(init_data->data.async.task, TRUE);
|
|
|
|
|
g_object_unref(init_data->data.async.task);
|
|
|
|
|
}
|
|
|
|
|
nm_g_object_unref(init_data->cancellable);
|
|
|
|
|
nm_g_slice_free(init_data);
|
|
|
|
|
}
|
|
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
/*****************************************************************************/
|
|
|
|
|
|
|
|
|
|
GError *
|
|
|
|
|
_nm_client_new_error_nm_not_running(void)
|
|
|
|
|
{
|
|
|
|
|
return g_error_new_literal(NM_CLIENT_ERROR,
|
|
|
|
|
NM_CLIENT_ERROR_MANAGER_NOT_RUNNING,
|
|
|
|
|
"NetworkManager is not running");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
GError *
|
|
|
|
|
_nm_client_new_error_nm_not_cached(void)
|
|
|
|
|
{
|
|
|
|
|
return g_error_new_literal(NM_CLIENT_ERROR,
|
|
|
|
|
NM_CLIENT_ERROR_FAILED,
|
|
|
|
|
"Object is no longer in the client cache");
|
|
|
|
|
}
|
|
|
|
|
|
libnm: fix leaking internal GMainContext for synchronously initialized NMClient
NMClient makes asynchronous D-Bus calls via g_dbus_connection_call().
This references the current GMainContext to later invoke the
asynchronous callback. Even when we cancel the asynchronous call,
the callback will still be invoked (later) to complete the request.
In particular this means when we destroy (unref) an NMClient, there
are quite possibly pending requests in the GMainContext. Although they
are cancelled, they keep the GMainContext alive.
With synchronous initialization, we have an internal GMainContext.
When we destroy the NMClient, we cannot just unhook the integrated
source, instead, we need to keep it integrated in the caller's main
context, as long as there are pending requests.
Add a mechanism to track those pending requests and fix the leak for the
internal GMainContext. Also expose the same mechanism to the user via a new
API called nm_client_get_context_busy_watcher(). This allows the user
to know when it can stop iterating the main context and when all
resources are reclaimed.
For example the following will lead to a crash:
for i in range(1,2000):
nmc = NM.Client.new(None)
This creates a number of NMClient instances and destroys them again.
Note that here the GMainContext is never iterated, because
synchronous initialization does not iterate the caller's context. So,
while we correctly unref and dispose the created NMClient instances,
there are pending requests left in the inner GMainContext. These pile
up and soon the program will crash because it runs out of file descriptors.
We can have a similar problem with asynchronous initialization, when
we create a new GMainContext per client, and don't iterate it after
we are done with the client.
Note that this patch does not avoid the problem in general. The problem
cannot be avoided, the user must iterate the main contex at some point.
Otherwise resources (memory and file descriptors) will be leaked.
Fixes: ce0e898fb476 ('libnm: refactor caching of D-Bus objects in NMClient')
https://gitlab.freedesktop.org/NetworkManager/NetworkManager/merge_requests/347
2019-11-26 00:23:41 +01:00
|
|
|
static void
|
2020-10-22 15:07:36 +02:00
|
|
|
_nm_client_dbus_call_simple_cb(GObject *source, GAsyncResult *result, gpointer user_data)
|
libnm: fix leaking internal GMainContext for synchronously initialized NMClient
NMClient makes asynchronous D-Bus calls via g_dbus_connection_call().
This references the current GMainContext to later invoke the
asynchronous callback. Even when we cancel the asynchronous call,
the callback will still be invoked (later) to complete the request.
In particular this means when we destroy (unref) an NMClient, there
are quite possibly pending requests in the GMainContext. Although they
are cancelled, they keep the GMainContext alive.
With synchronous initialization, we have an internal GMainContext.
When we destroy the NMClient, we cannot just unhook the integrated
source, instead, we need to keep it integrated in the caller's main
context, as long as there are pending requests.
Add a mechanism to track those pending requests and fix the leak for the
internal GMainContext. Also expose the same mechanism to the user via a new
API called nm_client_get_context_busy_watcher(). This allows the user
to know when it can stop iterating the main context and when all
resources are reclaimed.
For example the following will lead to a crash:
for i in range(1,2000):
nmc = NM.Client.new(None)
This creates a number of NMClient instances and destroys them again.
Note that here the GMainContext is never iterated, because
synchronous initialization does not iterate the caller's context. So,
while we correctly unref and dispose the created NMClient instances,
there are pending requests left in the inner GMainContext. These pile
up and soon the program will crash because it runs out of file descriptors.
We can have a similar problem with asynchronous initialization, when
we create a new GMainContext per client, and don't iterate it after
we are done with the client.
Note that this patch does not avoid the problem in general. The problem
cannot be avoided, the user must iterate the main contex at some point.
Otherwise resources (memory and file descriptors) will be leaked.
Fixes: ce0e898fb476 ('libnm: refactor caching of D-Bus objects in NMClient')
https://gitlab.freedesktop.org/NetworkManager/NetworkManager/merge_requests/347
2019-11-26 00:23:41 +01:00
|
|
|
{
|
2021-11-09 13:28:54 +01:00
|
|
|
GAsyncReadyCallback callback;
|
|
|
|
|
gpointer callback_user_data;
|
libnm: fix leaking internal GMainContext for synchronously initialized NMClient
NMClient makes asynchronous D-Bus calls via g_dbus_connection_call().
This references the current GMainContext to later invoke the
asynchronous callback. Even when we cancel the asynchronous call,
the callback will still be invoked (later) to complete the request.
In particular this means when we destroy (unref) an NMClient, there
are quite possibly pending requests in the GMainContext. Although they
are cancelled, they keep the GMainContext alive.
With synchronous initialization, we have an internal GMainContext.
When we destroy the NMClient, we cannot just unhook the integrated
source, instead, we need to keep it integrated in the caller's main
context, as long as there are pending requests.
Add a mechanism to track those pending requests and fix the leak for the
internal GMainContext. Also expose the same mechanism to the user via a new
API called nm_client_get_context_busy_watcher(). This allows the user
to know when it can stop iterating the main context and when all
resources are reclaimed.
For example the following will lead to a crash:
for i in range(1,2000):
nmc = NM.Client.new(None)
This creates a number of NMClient instances and destroys them again.
Note that here the GMainContext is never iterated, because
synchronous initialization does not iterate the caller's context. So,
while we correctly unref and dispose the created NMClient instances,
there are pending requests left in the inner GMainContext. These pile
up and soon the program will crash because it runs out of file descriptors.
We can have a similar problem with asynchronous initialization, when
we create a new GMainContext per client, and don't iterate it after
we are done with the client.
Note that this patch does not avoid the problem in general. The problem
cannot be avoided, the user must iterate the main contex at some point.
Otherwise resources (memory and file descriptors) will be leaked.
Fixes: ce0e898fb476 ('libnm: refactor caching of D-Bus objects in NMClient')
https://gitlab.freedesktop.org/NetworkManager/NetworkManager/merge_requests/347
2019-11-26 00:23:41 +01:00
|
|
|
gs_unref_object GObject *context_busy_watcher = NULL;
|
2020-07-15 12:39:50 +02:00
|
|
|
gpointer obfuscated_self_ptr;
|
|
|
|
|
gpointer log_call_counter_ptr;
|
|
|
|
|
|
2020-10-22 15:07:36 +02:00
|
|
|
nm_utils_user_data_unpack(user_data,
|
2020-07-15 12:39:50 +02:00
|
|
|
&callback,
|
2020-10-22 15:07:36 +02:00
|
|
|
&callback_user_data,
|
2020-07-15 12:39:50 +02:00
|
|
|
&context_busy_watcher,
|
|
|
|
|
&obfuscated_self_ptr,
|
|
|
|
|
&log_call_counter_ptr);
|
libnm: fix leaking internal GMainContext for synchronously initialized NMClient
NMClient makes asynchronous D-Bus calls via g_dbus_connection_call().
This references the current GMainContext to later invoke the
asynchronous callback. Even when we cancel the asynchronous call,
the callback will still be invoked (later) to complete the request.
In particular this means when we destroy (unref) an NMClient, there
are quite possibly pending requests in the GMainContext. Although they
are cancelled, they keep the GMainContext alive.
With synchronous initialization, we have an internal GMainContext.
When we destroy the NMClient, we cannot just unhook the integrated
source, instead, we need to keep it integrated in the caller's main
context, as long as there are pending requests.
Add a mechanism to track those pending requests and fix the leak for the
internal GMainContext. Also expose the same mechanism to the user via a new
API called nm_client_get_context_busy_watcher(). This allows the user
to know when it can stop iterating the main context and when all
resources are reclaimed.
For example the following will lead to a crash:
for i in range(1,2000):
nmc = NM.Client.new(None)
This creates a number of NMClient instances and destroys them again.
Note that here the GMainContext is never iterated, because
synchronous initialization does not iterate the caller's context. So,
while we correctly unref and dispose the created NMClient instances,
there are pending requests left in the inner GMainContext. These pile
up and soon the program will crash because it runs out of file descriptors.
We can have a similar problem with asynchronous initialization, when
we create a new GMainContext per client, and don't iterate it after
we are done with the client.
Note that this patch does not avoid the problem in general. The problem
cannot be avoided, the user must iterate the main contex at some point.
Otherwise resources (memory and file descriptors) will be leaked.
Fixes: ce0e898fb476 ('libnm: refactor caching of D-Bus objects in NMClient')
https://gitlab.freedesktop.org/NetworkManager/NetworkManager/merge_requests/347
2019-11-26 00:23:41 +01:00
|
|
|
|
2020-07-15 12:39:50 +02:00
|
|
|
NML_DBUS_LOG(_NML_NMCLIENT_LOG_LEVEL_COERCE(NML_DBUS_LOG_LEVEL_TRACE),
|
|
|
|
|
"nmclient[" NM_HASH_OBFUSCATE_PTR_FMT "]: call[%" G_GSIZE_FORMAT "] completed",
|
|
|
|
|
(guint64) GPOINTER_TO_SIZE(obfuscated_self_ptr),
|
|
|
|
|
GPOINTER_TO_SIZE(log_call_counter_ptr));
|
libnm: fix leaking internal GMainContext for synchronously initialized NMClient
NMClient makes asynchronous D-Bus calls via g_dbus_connection_call().
This references the current GMainContext to later invoke the
asynchronous callback. Even when we cancel the asynchronous call,
the callback will still be invoked (later) to complete the request.
In particular this means when we destroy (unref) an NMClient, there
are quite possibly pending requests in the GMainContext. Although they
are cancelled, they keep the GMainContext alive.
With synchronous initialization, we have an internal GMainContext.
When we destroy the NMClient, we cannot just unhook the integrated
source, instead, we need to keep it integrated in the caller's main
context, as long as there are pending requests.
Add a mechanism to track those pending requests and fix the leak for the
internal GMainContext. Also expose the same mechanism to the user via a new
API called nm_client_get_context_busy_watcher(). This allows the user
to know when it can stop iterating the main context and when all
resources are reclaimed.
For example the following will lead to a crash:
for i in range(1,2000):
nmc = NM.Client.new(None)
This creates a number of NMClient instances and destroys them again.
Note that here the GMainContext is never iterated, because
synchronous initialization does not iterate the caller's context. So,
while we correctly unref and dispose the created NMClient instances,
there are pending requests left in the inner GMainContext. These pile
up and soon the program will crash because it runs out of file descriptors.
We can have a similar problem with asynchronous initialization, when
we create a new GMainContext per client, and don't iterate it after
we are done with the client.
Note that this patch does not avoid the problem in general. The problem
cannot be avoided, the user must iterate the main contex at some point.
Otherwise resources (memory and file descriptors) will be leaked.
Fixes: ce0e898fb476 ('libnm: refactor caching of D-Bus objects in NMClient')
https://gitlab.freedesktop.org/NetworkManager/NetworkManager/merge_requests/347
2019-11-26 00:23:41 +01:00
|
|
|
|
2020-10-22 15:07:36 +02:00
|
|
|
callback(source, result, callback_user_data);
|
libnm: fix leaking internal GMainContext for synchronously initialized NMClient
NMClient makes asynchronous D-Bus calls via g_dbus_connection_call().
This references the current GMainContext to later invoke the
asynchronous callback. Even when we cancel the asynchronous call,
the callback will still be invoked (later) to complete the request.
In particular this means when we destroy (unref) an NMClient, there
are quite possibly pending requests in the GMainContext. Although they
are cancelled, they keep the GMainContext alive.
With synchronous initialization, we have an internal GMainContext.
When we destroy the NMClient, we cannot just unhook the integrated
source, instead, we need to keep it integrated in the caller's main
context, as long as there are pending requests.
Add a mechanism to track those pending requests and fix the leak for the
internal GMainContext. Also expose the same mechanism to the user via a new
API called nm_client_get_context_busy_watcher(). This allows the user
to know when it can stop iterating the main context and when all
resources are reclaimed.
For example the following will lead to a crash:
for i in range(1,2000):
nmc = NM.Client.new(None)
This creates a number of NMClient instances and destroys them again.
Note that here the GMainContext is never iterated, because
synchronous initialization does not iterate the caller's context. So,
while we correctly unref and dispose the created NMClient instances,
there are pending requests left in the inner GMainContext. These pile
up and soon the program will crash because it runs out of file descriptors.
We can have a similar problem with asynchronous initialization, when
we create a new GMainContext per client, and don't iterate it after
we are done with the client.
Note that this patch does not avoid the problem in general. The problem
cannot be avoided, the user must iterate the main contex at some point.
Otherwise resources (memory and file descriptors) will be leaked.
Fixes: ce0e898fb476 ('libnm: refactor caching of D-Bus objects in NMClient')
https://gitlab.freedesktop.org/NetworkManager/NetworkManager/merge_requests/347
2019-11-26 00:23:41 +01:00
|
|
|
}
|
|
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
void
|
2021-11-09 13:28:54 +01:00
|
|
|
_nm_client_dbus_call_simple(NMClient *self,
|
|
|
|
|
GCancellable *cancellable,
|
|
|
|
|
const char *object_path,
|
|
|
|
|
const char *interface_name,
|
|
|
|
|
const char *method_name,
|
|
|
|
|
GVariant *parameters,
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
const GVariantType *reply_type,
|
|
|
|
|
GDBusCallFlags flags,
|
|
|
|
|
int timeout_msec,
|
|
|
|
|
GAsyncReadyCallback callback,
|
|
|
|
|
gpointer user_data)
|
|
|
|
|
{
|
2021-11-09 13:28:54 +01:00
|
|
|
NMClientPrivate *priv = NM_CLIENT_GET_PRIVATE(self);
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
nm_auto_pop_gmaincontext GMainContext *dbus_context = NULL;
|
2021-11-09 13:28:54 +01:00
|
|
|
gs_free char *log_str = NULL;
|
2020-07-15 12:39:50 +02:00
|
|
|
gsize log_call_counter;
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
nm_assert(priv->name_owner);
|
|
|
|
|
nm_assert(!cancellable || G_IS_CANCELLABLE(cancellable));
|
|
|
|
|
nm_assert(callback);
|
|
|
|
|
nm_assert(object_path);
|
|
|
|
|
nm_assert(interface_name);
|
|
|
|
|
nm_assert(method_name);
|
|
|
|
|
nm_assert(reply_type);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
dbus_context = nm_g_main_context_push_thread_default_if_necessary(priv->dbus_context);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2020-07-15 12:39:50 +02:00
|
|
|
log_call_counter = ++priv->log_call_counter;
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2020-07-15 11:16:30 +02:00
|
|
|
NML_NMCLIENT_LOG_T(self,
|
2020-07-15 12:39:50 +02:00
|
|
|
"call[%" G_GSIZE_FORMAT "] D-Bus method on %s: %s, %s.%s -> %s (%s)",
|
|
|
|
|
log_call_counter,
|
2020-07-15 11:16:30 +02:00
|
|
|
priv->name_owner,
|
|
|
|
|
object_path,
|
|
|
|
|
interface_name,
|
|
|
|
|
method_name,
|
|
|
|
|
(const char *) reply_type ?: "???",
|
|
|
|
|
parameters ? (log_str = g_variant_print(parameters, TRUE)) : "NULL");
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
g_dbus_connection_call(priv->dbus_connection,
|
|
|
|
|
priv->name_owner,
|
|
|
|
|
object_path,
|
|
|
|
|
interface_name,
|
|
|
|
|
method_name,
|
|
|
|
|
parameters,
|
|
|
|
|
reply_type,
|
|
|
|
|
flags,
|
|
|
|
|
timeout_msec,
|
|
|
|
|
cancellable,
|
libnm: fix leaking internal GMainContext for synchronously initialized NMClient
NMClient makes asynchronous D-Bus calls via g_dbus_connection_call().
This references the current GMainContext to later invoke the
asynchronous callback. Even when we cancel the asynchronous call,
the callback will still be invoked (later) to complete the request.
In particular this means when we destroy (unref) an NMClient, there
are quite possibly pending requests in the GMainContext. Although they
are cancelled, they keep the GMainContext alive.
With synchronous initialization, we have an internal GMainContext.
When we destroy the NMClient, we cannot just unhook the integrated
source, instead, we need to keep it integrated in the caller's main
context, as long as there are pending requests.
Add a mechanism to track those pending requests and fix the leak for the
internal GMainContext. Also expose the same mechanism to the user via a new
API called nm_client_get_context_busy_watcher(). This allows the user
to know when it can stop iterating the main context and when all
resources are reclaimed.
For example the following will lead to a crash:
for i in range(1,2000):
nmc = NM.Client.new(None)
This creates a number of NMClient instances and destroys them again.
Note that here the GMainContext is never iterated, because
synchronous initialization does not iterate the caller's context. So,
while we correctly unref and dispose the created NMClient instances,
there are pending requests left in the inner GMainContext. These pile
up and soon the program will crash because it runs out of file descriptors.
We can have a similar problem with asynchronous initialization, when
we create a new GMainContext per client, and don't iterate it after
we are done with the client.
Note that this patch does not avoid the problem in general. The problem
cannot be avoided, the user must iterate the main contex at some point.
Otherwise resources (memory and file descriptors) will be leaked.
Fixes: ce0e898fb476 ('libnm: refactor caching of D-Bus objects in NMClient')
https://gitlab.freedesktop.org/NetworkManager/NetworkManager/merge_requests/347
2019-11-26 00:23:41 +01:00
|
|
|
_nm_client_dbus_call_simple_cb,
|
2020-07-15 12:39:50 +02:00
|
|
|
nm_utils_user_data_pack(callback,
|
|
|
|
|
user_data,
|
|
|
|
|
g_object_ref(priv->context_busy_watcher),
|
|
|
|
|
GSIZE_TO_POINTER(NM_HASH_OBFUSCATE_PTR(self)),
|
|
|
|
|
GSIZE_TO_POINTER(log_call_counter)));
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void
|
2021-11-09 13:28:54 +01:00
|
|
|
_nm_client_dbus_call(NMClient *self,
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
gpointer source_obj,
|
|
|
|
|
gpointer source_tag,
|
2021-11-09 13:28:54 +01:00
|
|
|
GCancellable *cancellable,
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
GAsyncReadyCallback user_callback,
|
|
|
|
|
gpointer user_callback_data,
|
2021-11-09 13:28:54 +01:00
|
|
|
const char *object_path,
|
|
|
|
|
const char *interface_name,
|
|
|
|
|
const char *method_name,
|
|
|
|
|
GVariant *parameters,
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
const GVariantType *reply_type,
|
|
|
|
|
GDBusCallFlags flags,
|
|
|
|
|
int timeout_msec,
|
|
|
|
|
GAsyncReadyCallback internal_callback)
|
|
|
|
|
{
|
2021-11-09 13:28:54 +01:00
|
|
|
NMClientPrivate *priv;
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
gs_unref_object GTask *task = NULL;
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
nm_assert(!source_obj || G_IS_OBJECT(source_obj));
|
|
|
|
|
nm_assert(source_tag);
|
|
|
|
|
nm_assert(!cancellable || G_IS_CANCELLABLE(cancellable));
|
|
|
|
|
nm_assert(internal_callback);
|
|
|
|
|
nm_assert(object_path);
|
|
|
|
|
nm_assert(interface_name);
|
|
|
|
|
nm_assert(method_name);
|
|
|
|
|
nm_assert(reply_type);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
task = nm_g_task_new(source_obj, cancellable, source_tag, user_callback, user_callback_data);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
if (!self) {
|
2022-07-04 13:46:39 +02:00
|
|
|
if (parameters)
|
|
|
|
|
nm_g_variant_unref_floating(parameters);
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
g_task_return_error(task, _nm_client_new_error_nm_not_cached());
|
|
|
|
|
return;
|
|
|
|
|
}
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
priv = NM_CLIENT_GET_PRIVATE(self);
|
|
|
|
|
if (!priv->name_owner) {
|
2022-07-04 13:46:39 +02:00
|
|
|
if (parameters)
|
|
|
|
|
nm_g_variant_unref_floating(parameters);
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
g_task_return_error(task, _nm_client_new_error_nm_not_running());
|
|
|
|
|
return;
|
|
|
|
|
}
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
_nm_client_dbus_call_simple(self,
|
|
|
|
|
cancellable,
|
|
|
|
|
object_path,
|
|
|
|
|
interface_name,
|
|
|
|
|
method_name,
|
|
|
|
|
parameters,
|
|
|
|
|
reply_type,
|
|
|
|
|
flags,
|
|
|
|
|
timeout_msec,
|
|
|
|
|
internal_callback,
|
|
|
|
|
g_steal_pointer(&task));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
GVariant *
|
2021-11-09 13:28:54 +01:00
|
|
|
_nm_client_dbus_call_sync(NMClient *self,
|
|
|
|
|
GCancellable *cancellable,
|
|
|
|
|
const char *object_path,
|
|
|
|
|
const char *interface_name,
|
|
|
|
|
const char *method_name,
|
|
|
|
|
GVariant *parameters,
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
const GVariantType *reply_type,
|
|
|
|
|
GDBusCallFlags flags,
|
|
|
|
|
int timeout_msec,
|
|
|
|
|
gboolean strip_dbus_error,
|
2021-11-09 13:28:54 +01:00
|
|
|
GError **error)
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
{
|
2021-11-09 13:28:54 +01:00
|
|
|
NMClientPrivate *priv;
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
gs_unref_variant GVariant *ret = NULL;
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
nm_assert(!cancellable || G_IS_CANCELLABLE(cancellable));
|
|
|
|
|
nm_assert(!error || !*error);
|
|
|
|
|
nm_assert(object_path);
|
|
|
|
|
nm_assert(interface_name);
|
|
|
|
|
nm_assert(method_name);
|
|
|
|
|
nm_assert(parameters);
|
|
|
|
|
nm_assert(reply_type);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
if (!self) {
|
|
|
|
|
nm_g_variant_unref_floating(parameters);
|
|
|
|
|
nm_g_set_error_take_lazy(error, _nm_client_new_error_nm_not_cached());
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
priv = NM_CLIENT_GET_PRIVATE(self);
|
|
|
|
|
if (!priv->name_owner) {
|
|
|
|
|
nm_g_variant_unref_floating(parameters);
|
|
|
|
|
nm_g_set_error_take_lazy(error, _nm_client_new_error_nm_not_running());
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
ret = g_dbus_connection_call_sync(priv->dbus_connection,
|
|
|
|
|
priv->name_owner,
|
|
|
|
|
object_path,
|
|
|
|
|
interface_name,
|
|
|
|
|
method_name,
|
|
|
|
|
parameters,
|
|
|
|
|
reply_type,
|
|
|
|
|
flags,
|
|
|
|
|
timeout_msec,
|
|
|
|
|
cancellable,
|
|
|
|
|
error);
|
|
|
|
|
if (!ret) {
|
|
|
|
|
if (error && strip_dbus_error)
|
|
|
|
|
g_dbus_error_strip_remote_error(*error);
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
return g_steal_pointer(&ret);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
gboolean
|
2021-11-09 13:28:54 +01:00
|
|
|
_nm_client_dbus_call_sync_void(NMClient *self,
|
|
|
|
|
GCancellable *cancellable,
|
|
|
|
|
const char *object_path,
|
|
|
|
|
const char *interface_name,
|
|
|
|
|
const char *method_name,
|
|
|
|
|
GVariant *parameters,
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
GDBusCallFlags flags,
|
|
|
|
|
int timeout_msec,
|
|
|
|
|
gboolean strip_dbus_error,
|
2021-11-09 13:28:54 +01:00
|
|
|
GError **error)
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
{
|
|
|
|
|
gs_unref_variant GVariant *ret = NULL;
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
ret = _nm_client_dbus_call_sync(self,
|
|
|
|
|
cancellable,
|
|
|
|
|
object_path,
|
|
|
|
|
interface_name,
|
|
|
|
|
method_name,
|
|
|
|
|
parameters,
|
|
|
|
|
G_VARIANT_TYPE("()"),
|
|
|
|
|
flags,
|
|
|
|
|
timeout_msec,
|
|
|
|
|
strip_dbus_error,
|
|
|
|
|
error);
|
|
|
|
|
return !!ret;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void
|
2021-11-09 13:28:54 +01:00
|
|
|
_nm_client_set_property_sync_legacy(NMClient *self,
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
const char *object_path,
|
|
|
|
|
const char *interface_name,
|
|
|
|
|
const char *property_name,
|
|
|
|
|
const char *format_string,
|
|
|
|
|
...)
|
|
|
|
|
{
|
2021-11-09 13:28:54 +01:00
|
|
|
NMClientPrivate *priv;
|
|
|
|
|
GVariant *val;
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
gs_unref_variant GVariant *ret = NULL;
|
|
|
|
|
va_list ap;
|
|
|
|
|
|
|
|
|
|
nm_assert(!self || NM_IS_CLIENT(self));
|
|
|
|
|
nm_assert(interface_name);
|
|
|
|
|
nm_assert(property_name);
|
|
|
|
|
nm_assert(format_string);
|
|
|
|
|
|
|
|
|
|
if (!self)
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
priv = NM_CLIENT_GET_PRIVATE(self);
|
|
|
|
|
if (!priv->name_owner)
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
va_start(ap, format_string);
|
|
|
|
|
val = g_variant_new_va(format_string, NULL, &ap);
|
|
|
|
|
va_end(ap);
|
|
|
|
|
|
|
|
|
|
nm_assert(val);
|
|
|
|
|
|
|
|
|
|
/* A synchronous D-Bus call that is not cancellable an ignores the return value.
|
|
|
|
|
* This function only exists for backward compatibility. */
|
|
|
|
|
ret = g_dbus_connection_call_sync(priv->dbus_connection,
|
|
|
|
|
priv->name_owner,
|
|
|
|
|
object_path,
|
|
|
|
|
DBUS_INTERFACE_PROPERTIES,
|
|
|
|
|
"Set",
|
|
|
|
|
g_variant_new("(ssv)", interface_name, property_name, val),
|
|
|
|
|
NULL,
|
|
|
|
|
G_DBUS_CALL_FLAGS_NONE,
|
|
|
|
|
2000,
|
|
|
|
|
NULL,
|
|
|
|
|
NULL);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*****************************************************************************/
|
|
|
|
|
|
|
|
|
|
#define _assert_main_context_is_current_source(self, x_context) \
|
|
|
|
|
G_STMT_START \
|
|
|
|
|
{ \
|
|
|
|
|
if (NM_MORE_ASSERTS > 0) { \
|
|
|
|
|
GSource *_source = g_main_current_source(); \
|
|
|
|
|
\
|
|
|
|
|
if (_source) { \
|
|
|
|
|
NMClientPrivate *_priv = NM_CLIENT_GET_PRIVATE(self); \
|
|
|
|
|
\
|
|
|
|
|
nm_assert(g_source_get_context(_source) == _priv->x_context); \
|
2022-03-11 11:45:32 +01:00
|
|
|
nm_assert(nm_g_main_context_can_acquire(_priv->x_context)); \
|
2020-09-28 16:03:33 +02:00
|
|
|
} \
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
} \
|
|
|
|
|
} \
|
|
|
|
|
G_STMT_END
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
#define _assert_main_context_is_current_thread_default(self, x_context) \
|
|
|
|
|
G_STMT_START \
|
|
|
|
|
{ \
|
|
|
|
|
if (NM_MORE_ASSERTS > 0) { \
|
|
|
|
|
NMClientPrivate *_priv = NM_CLIENT_GET_PRIVATE(self); \
|
|
|
|
|
\
|
|
|
|
|
nm_assert((g_main_context_get_thread_default() ?: g_main_context_default()) \
|
|
|
|
|
== _priv->x_context); \
|
2022-03-11 11:45:32 +01:00
|
|
|
nm_assert(nm_g_main_context_can_acquire(_priv->x_context)); \
|
2020-09-28 16:03:33 +02:00
|
|
|
} \
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
} \
|
|
|
|
|
G_STMT_END
|
|
|
|
|
|
|
|
|
|
/*****************************************************************************/
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
_nm_client_queue_notify_object(NMClient *self, gpointer nmobj, const GParamSpec *pspec)
|
|
|
|
|
{
|
|
|
|
|
NMObjectBase *base;
|
|
|
|
|
|
|
|
|
|
nm_assert(NM_IS_CLIENT(self));
|
|
|
|
|
nm_assert(NM_IS_OBJECT(nmobj) || NM_IS_CLIENT(nmobj));
|
|
|
|
|
|
|
|
|
|
base = (NMObjectBase *) nmobj;
|
2019-12-03 14:50:12 +01:00
|
|
|
|
|
|
|
|
if (base->is_disposing) {
|
|
|
|
|
/* Don't emit property changed signals once the NMClient
|
|
|
|
|
* instance is about to shut down. */
|
|
|
|
|
nm_assert(nmobj == self);
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
if (c_list_is_empty(&base->queue_notify_lst)) {
|
|
|
|
|
c_list_link_tail(&NM_CLIENT_GET_PRIVATE(self)->queue_notify_lst_head,
|
|
|
|
|
&base->queue_notify_lst);
|
|
|
|
|
g_object_ref(nmobj);
|
|
|
|
|
g_object_freeze_notify(nmobj);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (pspec)
|
|
|
|
|
g_object_notify_by_pspec(nmobj, (GParamSpec *) pspec);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*****************************************************************************/
|
|
|
|
|
|
|
|
|
|
gpointer
|
2021-11-09 13:28:54 +01:00
|
|
|
_nm_client_notify_event_queue(NMClient *self,
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
int priority,
|
|
|
|
|
NMClientNotifyEventCb callback,
|
|
|
|
|
gsize event_size)
|
|
|
|
|
{
|
2021-11-09 13:28:54 +01:00
|
|
|
NMClientPrivate *priv = NM_CLIENT_GET_PRIVATE(self);
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
NMClientNotifyEvent *notify_event;
|
|
|
|
|
|
|
|
|
|
nm_assert(callback);
|
|
|
|
|
nm_assert(event_size > sizeof(NMClientNotifyEvent));
|
|
|
|
|
|
|
|
|
|
notify_event = g_malloc(event_size);
|
|
|
|
|
notify_event->priority = priority;
|
|
|
|
|
notify_event->callback = callback;
|
|
|
|
|
c_list_link_tail(&priv->notify_event_lst_head, ¬ify_event->lst);
|
|
|
|
|
priv->notify_event_lst_changed = TRUE;
|
|
|
|
|
return notify_event;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
NMClientNotifyEventWithPtr *
|
2021-11-09 13:28:54 +01:00
|
|
|
_nm_client_notify_event_queue_with_ptr(NMClient *self,
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
int priority,
|
|
|
|
|
NMClientNotifyEventWithPtrCb callback,
|
|
|
|
|
gpointer user_data)
|
|
|
|
|
{
|
|
|
|
|
NMClientNotifyEventWithPtr *notify_event;
|
|
|
|
|
|
|
|
|
|
notify_event = _nm_client_notify_event_queue(self,
|
|
|
|
|
priority,
|
|
|
|
|
(NMClientNotifyEventCb) callback,
|
|
|
|
|
sizeof(NMClientNotifyEventWithPtr));
|
|
|
|
|
notify_event->user_data = user_data;
|
|
|
|
|
return notify_event;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*****************************************************************************/
|
|
|
|
|
|
|
|
|
|
typedef struct {
|
|
|
|
|
NMClientNotifyEvent parent;
|
2021-11-09 13:28:54 +01:00
|
|
|
GObject *source;
|
|
|
|
|
NMObject *obj;
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
guint signal_id;
|
|
|
|
|
} NMClientNotifyEventObjAddedRemove;
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
_nm_client_notify_event_queue_emit_obj_signal_cb(NMClient *self, gpointer notify_event_base)
|
|
|
|
|
{
|
|
|
|
|
NMClientNotifyEventObjAddedRemove *notify_event = notify_event_base;
|
|
|
|
|
|
|
|
|
|
NML_NMCLIENT_LOG_T(
|
|
|
|
|
self,
|
|
|
|
|
"[%s] emit \"%s\" signal for %s",
|
|
|
|
|
NM_IS_CLIENT(notify_event->source) ? "nmclient" : _nm_object_get_path(notify_event->source),
|
|
|
|
|
g_signal_name(notify_event->signal_id),
|
|
|
|
|
_nm_object_get_path(notify_event->obj));
|
|
|
|
|
|
|
|
|
|
nm_assert(NM_IS_OBJECT(notify_event->source) || NM_IS_CLIENT(notify_event->source));
|
|
|
|
|
|
|
|
|
|
g_signal_emit(notify_event->source, notify_event->signal_id, 0, notify_event->obj);
|
|
|
|
|
|
|
|
|
|
g_object_unref(notify_event->obj);
|
|
|
|
|
g_object_unref(notify_event->source);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
_nm_client_notify_event_queue_emit_obj_signal(NMClient *self,
|
2021-11-09 13:28:54 +01:00
|
|
|
GObject *source,
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
NMObject *nmobj,
|
|
|
|
|
gboolean is_added /* or else removed */,
|
|
|
|
|
int prio_offset,
|
|
|
|
|
guint signal_id)
|
|
|
|
|
{
|
|
|
|
|
NMClientNotifyEventObjAddedRemove *notify_event;
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
nm_assert(prio_offset >= 0);
|
|
|
|
|
nm_assert(prio_offset < 20);
|
|
|
|
|
nm_assert(NM_IS_OBJECT(source) || NM_IS_CLIENT(source));
|
|
|
|
|
nm_assert(NM_IS_OBJECT(nmobj));
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
if (((NMObjectBase *) source)->is_disposing) {
|
|
|
|
|
nm_assert(NM_IS_CLIENT(source));
|
|
|
|
|
return;
|
|
|
|
|
}
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
notify_event = _nm_client_notify_event_queue(
|
|
|
|
|
self,
|
|
|
|
|
is_added ? NM_CLIENT_NOTIFY_EVENT_PRIO_AFTER - 20 + prio_offset
|
|
|
|
|
: NM_CLIENT_NOTIFY_EVENT_PRIO_BEFORE + 20 - prio_offset,
|
|
|
|
|
_nm_client_notify_event_queue_emit_obj_signal_cb,
|
|
|
|
|
sizeof(NMClientNotifyEventObjAddedRemove));
|
|
|
|
|
notify_event->source = g_object_ref(source);
|
|
|
|
|
notify_event->obj = g_object_ref(nmobj);
|
|
|
|
|
notify_event->signal_id = signal_id;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*****************************************************************************/
|
|
|
|
|
|
|
|
|
|
static int
|
|
|
|
|
_nm_client_notify_event_cmp(const CList *a, const CList *b, const void *user_data)
|
|
|
|
|
{
|
|
|
|
|
NM_CMP_DIRECT(c_list_entry(a, NMClientNotifyEvent, lst)->priority,
|
|
|
|
|
c_list_entry(b, NMClientNotifyEvent, lst)->priority);
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
_nm_client_notify_event_emit_parts(NMClient *self, int max_priority /* included! */)
|
|
|
|
|
{
|
2021-11-09 13:28:54 +01:00
|
|
|
NMClientPrivate *priv = NM_CLIENT_GET_PRIVATE(self);
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
NMClientNotifyEvent *notify_event;
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
while (TRUE) {
|
|
|
|
|
if (priv->notify_event_lst_changed) {
|
|
|
|
|
priv->notify_event_lst_changed = FALSE;
|
|
|
|
|
c_list_sort(&priv->notify_event_lst_head, _nm_client_notify_event_cmp, NULL);
|
|
|
|
|
}
|
|
|
|
|
notify_event = c_list_first_entry(&priv->notify_event_lst_head, NMClientNotifyEvent, lst);
|
|
|
|
|
if (!notify_event)
|
|
|
|
|
return;
|
|
|
|
|
if (notify_event->priority > max_priority)
|
|
|
|
|
return;
|
|
|
|
|
c_list_unlink_stale(¬ify_event->lst);
|
|
|
|
|
notify_event->callback(self, notify_event);
|
|
|
|
|
g_free(notify_event);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
_nm_client_notify_event_emit(NMClient *self)
|
|
|
|
|
{
|
|
|
|
|
NMClientPrivate *priv = NM_CLIENT_GET_PRIVATE(self);
|
2021-11-09 13:28:54 +01:00
|
|
|
NMObjectBase *base;
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
|
|
|
|
|
_nm_client_notify_event_emit_parts(self, NM_CLIENT_NOTIFY_EVENT_PRIO_GPROP);
|
|
|
|
|
|
|
|
|
|
while (
|
|
|
|
|
(base = c_list_first_entry(&priv->queue_notify_lst_head, NMObjectBase, queue_notify_lst))) {
|
|
|
|
|
c_list_unlink(&base->queue_notify_lst);
|
|
|
|
|
g_object_thaw_notify(G_OBJECT(base));
|
|
|
|
|
g_object_unref(base);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
_nm_client_notify_event_emit_parts(self, G_MAXINT);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*****************************************************************************/
|
|
|
|
|
|
|
|
|
|
GDBusConnection *
|
|
|
|
|
_nm_client_get_dbus_connection(NMClient *self)
|
|
|
|
|
{
|
|
|
|
|
return NM_CLIENT_GET_PRIVATE(self)->dbus_connection;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const char *
|
|
|
|
|
_nm_client_get_dbus_name_owner(NMClient *self)
|
|
|
|
|
{
|
|
|
|
|
return NM_CLIENT_GET_PRIVATE(self)->name_owner;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
GMainContext *
|
|
|
|
|
_nm_client_get_context_main(NMClient *self)
|
|
|
|
|
{
|
|
|
|
|
return NM_CLIENT_GET_PRIVATE(self)->main_context;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
GMainContext *
|
|
|
|
|
_nm_client_get_context_dbus(NMClient *self)
|
|
|
|
|
{
|
|
|
|
|
return NM_CLIENT_GET_PRIVATE(self)->dbus_context;
|
|
|
|
|
}
|
|
|
|
|
|
2019-11-26 13:31:11 +01:00
|
|
|
/**
|
|
|
|
|
* nm_client_get_main_context:
|
|
|
|
|
* @self: the #NMClient instance
|
|
|
|
|
*
|
|
|
|
|
* The #NMClient instance is permanently associated with the current
|
|
|
|
|
* thread default #GMainContext, referenced the time when the instance
|
|
|
|
|
* was created. To receive events, the user must iterate this context
|
|
|
|
|
* and can use it to synchronize access to the client.
|
|
|
|
|
*
|
|
|
|
|
* Note that even after #NMClient instance got destroyed, there might
|
|
|
|
|
* still be pending sources registered in the context. That means, to fully
|
|
|
|
|
* clean up, the user must continue iterating the context as long as
|
|
|
|
|
* the nm_client_get_context_busy_watcher() object is alive.
|
|
|
|
|
*
|
|
|
|
|
* Returns: (transfer none): the #GMainContext of the client.
|
|
|
|
|
*
|
|
|
|
|
* Since: 1.22
|
|
|
|
|
*/
|
|
|
|
|
GMainContext *
|
|
|
|
|
nm_client_get_main_context(NMClient *self)
|
|
|
|
|
{
|
|
|
|
|
g_return_val_if_fail(NM_IS_CLIENT(self), NULL);
|
|
|
|
|
|
|
|
|
|
return _nm_client_get_context_main(self);
|
|
|
|
|
}
|
|
|
|
|
|
libnm: fix leaking internal GMainContext for synchronously initialized NMClient
NMClient makes asynchronous D-Bus calls via g_dbus_connection_call().
This references the current GMainContext to later invoke the
asynchronous callback. Even when we cancel the asynchronous call,
the callback will still be invoked (later) to complete the request.
In particular this means when we destroy (unref) an NMClient, there
are quite possibly pending requests in the GMainContext. Although they
are cancelled, they keep the GMainContext alive.
With synchronous initialization, we have an internal GMainContext.
When we destroy the NMClient, we cannot just unhook the integrated
source, instead, we need to keep it integrated in the caller's main
context, as long as there are pending requests.
Add a mechanism to track those pending requests and fix the leak for the
internal GMainContext. Also expose the same mechanism to the user via a new
API called nm_client_get_context_busy_watcher(). This allows the user
to know when it can stop iterating the main context and when all
resources are reclaimed.
For example the following will lead to a crash:
for i in range(1,2000):
nmc = NM.Client.new(None)
This creates a number of NMClient instances and destroys them again.
Note that here the GMainContext is never iterated, because
synchronous initialization does not iterate the caller's context. So,
while we correctly unref and dispose the created NMClient instances,
there are pending requests left in the inner GMainContext. These pile
up and soon the program will crash because it runs out of file descriptors.
We can have a similar problem with asynchronous initialization, when
we create a new GMainContext per client, and don't iterate it after
we are done with the client.
Note that this patch does not avoid the problem in general. The problem
cannot be avoided, the user must iterate the main contex at some point.
Otherwise resources (memory and file descriptors) will be leaked.
Fixes: ce0e898fb476 ('libnm: refactor caching of D-Bus objects in NMClient')
https://gitlab.freedesktop.org/NetworkManager/NetworkManager/merge_requests/347
2019-11-26 00:23:41 +01:00
|
|
|
/**
|
|
|
|
|
* nm_client_get_context_busy_watcher:
|
|
|
|
|
* @self: the NMClient instance.
|
|
|
|
|
*
|
|
|
|
|
* Returns: (transfer none): a GObject that stays alive as long as there are pending
|
|
|
|
|
* D-Bus operations.
|
|
|
|
|
*
|
|
|
|
|
* NMClient will schedule asynchronous D-Bus requests which will complete on
|
|
|
|
|
* the GMainContext associated with the instance. When destroying the NMClient
|
|
|
|
|
* instance, those requests are cancelled right away, however their pending requests are
|
|
|
|
|
* still outstanding and queued in the GMainContext. These outstanding callbacks
|
|
|
|
|
* keep the GMainContext alive. In order to fully release all resources,
|
|
|
|
|
* the user must keep iterating the main context until all these callbacks
|
|
|
|
|
* are handled. Of course, at this point no more actual callbacks will be invoked
|
2021-04-18 08:34:27 +02:00
|
|
|
* for the user, those are all cancelled internally.
|
libnm: fix leaking internal GMainContext for synchronously initialized NMClient
NMClient makes asynchronous D-Bus calls via g_dbus_connection_call().
This references the current GMainContext to later invoke the
asynchronous callback. Even when we cancel the asynchronous call,
the callback will still be invoked (later) to complete the request.
In particular this means when we destroy (unref) an NMClient, there
are quite possibly pending requests in the GMainContext. Although they
are cancelled, they keep the GMainContext alive.
With synchronous initialization, we have an internal GMainContext.
When we destroy the NMClient, we cannot just unhook the integrated
source, instead, we need to keep it integrated in the caller's main
context, as long as there are pending requests.
Add a mechanism to track those pending requests and fix the leak for the
internal GMainContext. Also expose the same mechanism to the user via a new
API called nm_client_get_context_busy_watcher(). This allows the user
to know when it can stop iterating the main context and when all
resources are reclaimed.
For example the following will lead to a crash:
for i in range(1,2000):
nmc = NM.Client.new(None)
This creates a number of NMClient instances and destroys them again.
Note that here the GMainContext is never iterated, because
synchronous initialization does not iterate the caller's context. So,
while we correctly unref and dispose the created NMClient instances,
there are pending requests left in the inner GMainContext. These pile
up and soon the program will crash because it runs out of file descriptors.
We can have a similar problem with asynchronous initialization, when
we create a new GMainContext per client, and don't iterate it after
we are done with the client.
Note that this patch does not avoid the problem in general. The problem
cannot be avoided, the user must iterate the main contex at some point.
Otherwise resources (memory and file descriptors) will be leaked.
Fixes: ce0e898fb476 ('libnm: refactor caching of D-Bus objects in NMClient')
https://gitlab.freedesktop.org/NetworkManager/NetworkManager/merge_requests/347
2019-11-26 00:23:41 +01:00
|
|
|
*
|
|
|
|
|
* This just leaves one problem: how long does the user need to keep the
|
|
|
|
|
* GMainContext running to ensure everything is cleaned up? The answer is
|
|
|
|
|
* this GObject. Subscribe a weak reference to the returned object and keep
|
|
|
|
|
* iterating the main context until the object got unreferenced.
|
|
|
|
|
*
|
2021-04-18 08:34:27 +02:00
|
|
|
* Note that after the NMClient instance gets destroyed, all outstanding operations
|
|
|
|
|
* will be cancelled right away. That means, the user needs to iterate the #GMainContext
|
|
|
|
|
* a bit longer, but it is guaranteed that the cleanup happens soon after.
|
|
|
|
|
*
|
|
|
|
|
* The way of using the context-busy-watch, is by registering a weak pointer to
|
|
|
|
|
* see when it gets destroyed. That means, user code should not take additional
|
|
|
|
|
* references on this object to not keep it alive longer.
|
|
|
|
|
*
|
|
|
|
|
* If you plan to exit the program after releasing the NMClient instance
|
|
|
|
|
* you may not need to worry about these "leaks". Also, if you anyway plan to continue
|
|
|
|
|
* iterating the #GMainContext afterwards, then you don't need to care when exactly
|
|
|
|
|
* NMClient is gone completely.
|
libnm: fix leaking internal GMainContext for synchronously initialized NMClient
NMClient makes asynchronous D-Bus calls via g_dbus_connection_call().
This references the current GMainContext to later invoke the
asynchronous callback. Even when we cancel the asynchronous call,
the callback will still be invoked (later) to complete the request.
In particular this means when we destroy (unref) an NMClient, there
are quite possibly pending requests in the GMainContext. Although they
are cancelled, they keep the GMainContext alive.
With synchronous initialization, we have an internal GMainContext.
When we destroy the NMClient, we cannot just unhook the integrated
source, instead, we need to keep it integrated in the caller's main
context, as long as there are pending requests.
Add a mechanism to track those pending requests and fix the leak for the
internal GMainContext. Also expose the same mechanism to the user via a new
API called nm_client_get_context_busy_watcher(). This allows the user
to know when it can stop iterating the main context and when all
resources are reclaimed.
For example the following will lead to a crash:
for i in range(1,2000):
nmc = NM.Client.new(None)
This creates a number of NMClient instances and destroys them again.
Note that here the GMainContext is never iterated, because
synchronous initialization does not iterate the caller's context. So,
while we correctly unref and dispose the created NMClient instances,
there are pending requests left in the inner GMainContext. These pile
up and soon the program will crash because it runs out of file descriptors.
We can have a similar problem with asynchronous initialization, when
we create a new GMainContext per client, and don't iterate it after
we are done with the client.
Note that this patch does not avoid the problem in general. The problem
cannot be avoided, the user must iterate the main contex at some point.
Otherwise resources (memory and file descriptors) will be leaked.
Fixes: ce0e898fb476 ('libnm: refactor caching of D-Bus objects in NMClient')
https://gitlab.freedesktop.org/NetworkManager/NetworkManager/merge_requests/347
2019-11-26 00:23:41 +01:00
|
|
|
*
|
|
|
|
|
* Since: 1.22
|
|
|
|
|
*/
|
|
|
|
|
GObject *
|
|
|
|
|
nm_client_get_context_busy_watcher(NMClient *self)
|
|
|
|
|
{
|
|
|
|
|
GObject *w;
|
|
|
|
|
|
|
|
|
|
g_return_val_if_fail(NM_IS_CLIENT(self), NULL);
|
|
|
|
|
|
|
|
|
|
w = NM_CLIENT_GET_PRIVATE(self)->context_busy_watcher;
|
2019-12-17 13:59:15 +01:00
|
|
|
return g_object_get_qdata(w, nm_context_busy_watcher_quark()) ?: w;
|
libnm: fix leaking internal GMainContext for synchronously initialized NMClient
NMClient makes asynchronous D-Bus calls via g_dbus_connection_call().
This references the current GMainContext to later invoke the
asynchronous callback. Even when we cancel the asynchronous call,
the callback will still be invoked (later) to complete the request.
In particular this means when we destroy (unref) an NMClient, there
are quite possibly pending requests in the GMainContext. Although they
are cancelled, they keep the GMainContext alive.
With synchronous initialization, we have an internal GMainContext.
When we destroy the NMClient, we cannot just unhook the integrated
source, instead, we need to keep it integrated in the caller's main
context, as long as there are pending requests.
Add a mechanism to track those pending requests and fix the leak for the
internal GMainContext. Also expose the same mechanism to the user via a new
API called nm_client_get_context_busy_watcher(). This allows the user
to know when it can stop iterating the main context and when all
resources are reclaimed.
For example the following will lead to a crash:
for i in range(1,2000):
nmc = NM.Client.new(None)
This creates a number of NMClient instances and destroys them again.
Note that here the GMainContext is never iterated, because
synchronous initialization does not iterate the caller's context. So,
while we correctly unref and dispose the created NMClient instances,
there are pending requests left in the inner GMainContext. These pile
up and soon the program will crash because it runs out of file descriptors.
We can have a similar problem with asynchronous initialization, when
we create a new GMainContext per client, and don't iterate it after
we are done with the client.
Note that this patch does not avoid the problem in general. The problem
cannot be avoided, the user must iterate the main contex at some point.
Otherwise resources (memory and file descriptors) will be leaked.
Fixes: ce0e898fb476 ('libnm: refactor caching of D-Bus objects in NMClient')
https://gitlab.freedesktop.org/NetworkManager/NetworkManager/merge_requests/347
2019-11-26 00:23:41 +01:00
|
|
|
}
|
|
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
struct udev *
|
|
|
|
|
_nm_client_get_udev(NMClient *self)
|
|
|
|
|
{
|
|
|
|
|
NMClientPrivate *priv = NM_CLIENT_GET_PRIVATE(self);
|
|
|
|
|
|
|
|
|
|
if (G_UNLIKELY(!priv->udev_inited)) {
|
|
|
|
|
priv->udev_inited = TRUE;
|
|
|
|
|
/* for testing, we don't want to use udev in libnm. */
|
|
|
|
|
if (!nm_streq0(g_getenv("LIBNM_USE_NO_UDEV"), "1"))
|
|
|
|
|
priv->udev = udev_new();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return priv->udev;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*****************************************************************************/
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
_ASSERT_dbobj(NMLDBusObject *dbobj, NMClient *self)
|
|
|
|
|
{
|
|
|
|
|
#if NM_MORE_ASSERTS > 5
|
|
|
|
|
nm_assert(NM_IS_CLIENT(self));
|
|
|
|
|
nm_assert(NML_IS_DBUS_OBJECT(dbobj));
|
|
|
|
|
nm_assert(dbobj == g_hash_table_lookup(NM_CLIENT_GET_PRIVATE(self)->dbus_objects, dbobj));
|
|
|
|
|
#endif
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static NMLDBusObject *
|
|
|
|
|
nml_dbus_object_new(NMRefString *dbus_path_take)
|
|
|
|
|
{
|
|
|
|
|
NMLDBusObject *dbobj;
|
|
|
|
|
|
|
|
|
|
nm_assert(NM_IS_REF_STRING(dbus_path_take));
|
|
|
|
|
|
|
|
|
|
dbobj = g_slice_new(NMLDBusObject);
|
2024-10-04 10:45:56 +02:00
|
|
|
*dbobj = (NMLDBusObject) {
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
.dbus_path = g_steal_pointer(&dbus_path_take),
|
|
|
|
|
.ref_count = 1,
|
|
|
|
|
.dbus_objects_lst = C_LIST_INIT(dbobj->dbus_objects_lst),
|
|
|
|
|
.iface_lst_head = C_LIST_INIT(dbobj->iface_lst_head),
|
|
|
|
|
.watcher_lst_head = C_LIST_INIT(dbobj->watcher_lst_head),
|
|
|
|
|
.obj_changed_lst = C_LIST_INIT(dbobj->obj_changed_lst),
|
|
|
|
|
.obj_state = NML_DBUS_OBJ_STATE_UNLINKED,
|
|
|
|
|
};
|
|
|
|
|
return dbobj;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
NMLDBusObject *
|
|
|
|
|
nml_dbus_object_ref(NMLDBusObject *dbobj)
|
|
|
|
|
{
|
|
|
|
|
nm_assert(dbobj);
|
|
|
|
|
nm_assert(dbobj->ref_count > 0);
|
|
|
|
|
|
|
|
|
|
dbobj->ref_count++;
|
|
|
|
|
return dbobj;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
nml_dbus_object_unref(NMLDBusObject *dbobj)
|
|
|
|
|
{
|
|
|
|
|
nm_assert(dbobj);
|
|
|
|
|
nm_assert(dbobj->ref_count > 0);
|
|
|
|
|
|
|
|
|
|
if (--dbobj->ref_count > 0)
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
nm_assert(c_list_is_empty(&dbobj->obj_changed_lst));
|
|
|
|
|
nm_assert(c_list_is_empty(&dbobj->iface_lst_head));
|
|
|
|
|
nm_assert(c_list_is_empty(&dbobj->watcher_lst_head));
|
|
|
|
|
nm_assert(!dbobj->nmobj);
|
|
|
|
|
|
|
|
|
|
nm_ref_string_unref(dbobj->dbus_path);
|
|
|
|
|
nm_g_slice_free(dbobj);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static NMLDBusObjIfaceData *
|
|
|
|
|
nml_dbus_object_iface_data_get(NMLDBusObject *dbobj,
|
2021-11-09 13:28:54 +01:00
|
|
|
const char *dbus_iface_name,
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
gboolean allow_create)
|
|
|
|
|
{
|
|
|
|
|
const NMLDBusMetaIface *meta_iface;
|
2021-11-09 13:28:54 +01:00
|
|
|
NMLDBusObjIfaceData *db_iface_data;
|
|
|
|
|
NMLDBusObjPropData *db_prop_data;
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
guint count = 0;
|
|
|
|
|
guint i;
|
|
|
|
|
|
|
|
|
|
nm_assert(NML_IS_DBUS_OBJECT(dbobj));
|
|
|
|
|
nm_assert(dbus_iface_name);
|
|
|
|
|
|
|
|
|
|
#if NM_MORE_ASSERTS > 10
|
|
|
|
|
{
|
|
|
|
|
gboolean expect_well_known = TRUE;
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
/* all well-known interfaces must come first in the list. */
|
|
|
|
|
c_list_for_each_entry (db_iface_data, &dbobj->iface_lst_head, iface_lst) {
|
|
|
|
|
if (db_iface_data->dbus_iface_is_wellknown == expect_well_known)
|
|
|
|
|
continue;
|
|
|
|
|
nm_assert(expect_well_known);
|
|
|
|
|
expect_well_known = FALSE;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
meta_iface = nml_dbus_meta_iface_get(dbus_iface_name);
|
|
|
|
|
if (meta_iface) {
|
|
|
|
|
c_list_for_each_entry (db_iface_data, &dbobj->iface_lst_head, iface_lst) {
|
|
|
|
|
if (!db_iface_data->dbus_iface_is_wellknown)
|
|
|
|
|
break;
|
|
|
|
|
if (db_iface_data->iface_removed)
|
|
|
|
|
continue;
|
|
|
|
|
if (db_iface_data->dbus_iface.meta == meta_iface)
|
|
|
|
|
return db_iface_data;
|
|
|
|
|
count++;
|
|
|
|
|
}
|
|
|
|
|
} else {
|
2020-11-04 17:26:50 +01:00
|
|
|
c_list_for_each_entry_prev (db_iface_data, &dbobj->iface_lst_head, iface_lst) {
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
if (db_iface_data->dbus_iface_is_wellknown)
|
|
|
|
|
break;
|
|
|
|
|
if (db_iface_data->iface_removed)
|
|
|
|
|
continue;
|
|
|
|
|
if (nm_streq(db_iface_data->dbus_iface.name->str, dbus_iface_name))
|
|
|
|
|
return db_iface_data;
|
|
|
|
|
count++;
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
if (!allow_create)
|
|
|
|
|
return NULL;
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
if (count > 20) {
|
|
|
|
|
/* We track the list of interfaces that an object has in a linked list.
|
|
|
|
|
* That is efficient and convenient, if we assume that each object only has a small
|
|
|
|
|
* number of interfaces (which very much should be the case). Here, something is very
|
|
|
|
|
* odd, maybe there is a bug or the server side is misbehaving. Anyway, error out. */
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
db_iface_data = g_malloc(
|
|
|
|
|
G_STRUCT_OFFSET(NMLDBusObjIfaceData, prop_datas)
|
|
|
|
|
+ (meta_iface ? (sizeof(NMLDBusObjPropData) * meta_iface->n_dbus_properties) : 0u));
|
|
|
|
|
if (meta_iface) {
|
2024-10-04 10:45:56 +02:00
|
|
|
*db_iface_data = (NMLDBusObjIfaceData) {
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
.dbus_iface.meta = meta_iface,
|
|
|
|
|
.dbus_iface_is_wellknown = TRUE,
|
|
|
|
|
.changed_prop_lst_head = C_LIST_INIT(db_iface_data->changed_prop_lst_head),
|
|
|
|
|
.iface_removed = FALSE,
|
|
|
|
|
};
|
|
|
|
|
db_prop_data = &db_iface_data->prop_datas[0];
|
|
|
|
|
for (i = 0; i < meta_iface->n_dbus_properties; i++, db_prop_data++) {
|
2024-10-04 10:45:56 +02:00
|
|
|
*db_prop_data = (NMLDBusObjPropData) {
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
.prop_data_value = NULL,
|
|
|
|
|
.changed_prop_lst = C_LIST_INIT(db_prop_data->changed_prop_lst),
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
c_list_link_front(&dbobj->iface_lst_head, &db_iface_data->iface_lst);
|
|
|
|
|
} else {
|
|
|
|
|
/* Intentionally don't initialize the other fields. We are not supposed
|
|
|
|
|
* to touch them, and a valgrind warning would be preferable. */
|
|
|
|
|
db_iface_data->dbus_iface.name = nm_ref_string_new(dbus_iface_name);
|
|
|
|
|
db_iface_data->dbus_iface_is_wellknown = FALSE;
|
|
|
|
|
db_iface_data->iface_removed = FALSE;
|
|
|
|
|
c_list_link_tail(&dbobj->iface_lst_head, &db_iface_data->iface_lst);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return db_iface_data;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
nml_dbus_obj_iface_data_destroy(NMLDBusObjIfaceData *db_iface_data)
|
|
|
|
|
{
|
|
|
|
|
guint i;
|
|
|
|
|
|
|
|
|
|
nm_assert(db_iface_data);
|
|
|
|
|
nm_assert(c_list_is_empty(&db_iface_data->iface_lst));
|
|
|
|
|
|
|
|
|
|
if (db_iface_data->dbus_iface_is_wellknown) {
|
|
|
|
|
for (i = 0; i < db_iface_data->dbus_iface.meta->n_dbus_properties; i++)
|
|
|
|
|
nm_g_variant_unref(db_iface_data->prop_datas[i].prop_data_value);
|
|
|
|
|
} else
|
|
|
|
|
nm_ref_string_unref(db_iface_data->dbus_iface.name);
|
|
|
|
|
|
|
|
|
|
g_free(db_iface_data);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
gpointer
|
2021-11-09 13:28:54 +01:00
|
|
|
nml_dbus_object_get_property_location(NMLDBusObject *dbobj,
|
|
|
|
|
const NMLDBusMetaIface *meta_iface,
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
const NMLDBusMetaProperty *meta_property)
|
|
|
|
|
{
|
|
|
|
|
char *target_c;
|
|
|
|
|
|
2022-12-01 14:07:22 +01:00
|
|
|
target_c = ((gpointer) dbobj->nmobj);
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
if (meta_iface->base_struct_offset > 0)
|
2022-12-01 14:07:22 +01:00
|
|
|
target_c = *NM_CAST_ALIGN(gpointer, &target_c[meta_iface->base_struct_offset]);
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
return &target_c[meta_property->prop_struct_offset];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
nml_dbus_object_set_obj_state(NMLDBusObject *dbobj, NMLDBusObjState obj_state, NMClient *self)
|
|
|
|
|
{
|
|
|
|
|
NMClientPrivate *priv;
|
|
|
|
|
|
|
|
|
|
nm_assert(NM_IS_CLIENT(self));
|
|
|
|
|
nm_assert(NML_IS_DBUS_OBJECT(dbobj));
|
|
|
|
|
|
|
|
|
|
#if NM_MORE_ASSERTS > 10
|
|
|
|
|
priv = NM_CLIENT_GET_PRIVATE(self);
|
|
|
|
|
switch (dbobj->obj_state) {
|
|
|
|
|
case NML_DBUS_OBJ_STATE_UNLINKED:
|
|
|
|
|
nm_assert(c_list_is_empty(&dbobj->dbus_objects_lst));
|
|
|
|
|
break;
|
|
|
|
|
case NML_DBUS_OBJ_STATE_WATCHED_ONLY:
|
|
|
|
|
nm_assert(
|
|
|
|
|
c_list_contains(&priv->dbus_objects_lst_head_watched_only, &dbobj->dbus_objects_lst));
|
|
|
|
|
break;
|
|
|
|
|
case NML_DBUS_OBJ_STATE_ON_DBUS:
|
|
|
|
|
nm_assert(c_list_contains(&priv->dbus_objects_lst_head_on_dbus, &dbobj->dbus_objects_lst));
|
|
|
|
|
break;
|
|
|
|
|
case NML_DBUS_OBJ_STATE_WITH_NMOBJ_NOT_READY:
|
|
|
|
|
nm_assert(c_list_contains(&priv->dbus_objects_lst_head_with_nmobj_not_ready,
|
|
|
|
|
&dbobj->dbus_objects_lst));
|
|
|
|
|
break;
|
|
|
|
|
case NML_DBUS_OBJ_STATE_WITH_NMOBJ_READY:
|
|
|
|
|
nm_assert(c_list_contains(&priv->dbus_objects_lst_head_with_nmobj_ready,
|
|
|
|
|
&dbobj->dbus_objects_lst));
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
if (dbobj->obj_state == obj_state)
|
|
|
|
|
return;
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
NML_NMCLIENT_LOG_T(self,
|
|
|
|
|
"[%s]: set D-Bus object state %s",
|
|
|
|
|
dbobj->dbus_path->str,
|
|
|
|
|
nml_dbus_obj_state_to_string(obj_state));
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
priv = NM_CLIENT_GET_PRIVATE(self);
|
|
|
|
|
dbobj->obj_state = obj_state;
|
|
|
|
|
switch (obj_state) {
|
|
|
|
|
case NML_DBUS_OBJ_STATE_UNLINKED:
|
|
|
|
|
c_list_unlink(&dbobj->dbus_objects_lst);
|
|
|
|
|
c_list_unlink(&dbobj->obj_changed_lst);
|
|
|
|
|
dbobj->obj_changed_type = NML_DBUS_OBJ_CHANGED_TYPE_NONE;
|
|
|
|
|
break;
|
|
|
|
|
case NML_DBUS_OBJ_STATE_WATCHED_ONLY:
|
|
|
|
|
nm_c_list_move_tail(&priv->dbus_objects_lst_head_watched_only, &dbobj->dbus_objects_lst);
|
|
|
|
|
break;
|
|
|
|
|
case NML_DBUS_OBJ_STATE_ON_DBUS:
|
|
|
|
|
nm_c_list_move_tail(&priv->dbus_objects_lst_head_on_dbus, &dbobj->dbus_objects_lst);
|
|
|
|
|
break;
|
|
|
|
|
case NML_DBUS_OBJ_STATE_WITH_NMOBJ_NOT_READY:
|
|
|
|
|
nm_c_list_move_tail(&priv->dbus_objects_lst_head_with_nmobj_not_ready,
|
|
|
|
|
&dbobj->dbus_objects_lst);
|
|
|
|
|
break;
|
|
|
|
|
case NML_DBUS_OBJ_STATE_WITH_NMOBJ_READY:
|
|
|
|
|
nm_c_list_move_tail(&priv->dbus_objects_lst_head_with_nmobj_ready,
|
|
|
|
|
&dbobj->dbus_objects_lst);
|
|
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
nm_assert_not_reached();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*****************************************************************************/
|
|
|
|
|
|
|
|
|
|
static void
|
2021-11-09 13:28:54 +01:00
|
|
|
nml_dbus_object_obj_changed_link(NMClient *self,
|
|
|
|
|
NMLDBusObject *dbobj,
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
NMLDBusObjChangedType changed_type)
|
|
|
|
|
{
|
|
|
|
|
nm_assert(NM_IS_CLIENT(self));
|
|
|
|
|
nm_assert(NML_IS_DBUS_OBJECT(dbobj));
|
|
|
|
|
nm_assert(changed_type != NML_DBUS_OBJ_CHANGED_TYPE_NONE);
|
|
|
|
|
|
2022-01-27 22:09:26 +01:00
|
|
|
/* Links @dbobj in the "obj_changed_lst", with the new "changed_type". */
|
|
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
if (!NM_FLAGS_ALL((NMLDBusObjChangedType) dbobj->obj_changed_type, changed_type))
|
2020-01-30 16:54:28 +01:00
|
|
|
NML_NMCLIENT_LOG_T(self,
|
|
|
|
|
"[%s]: changed-type 0x%02x linked",
|
|
|
|
|
dbobj->dbus_path->str,
|
|
|
|
|
(guint) changed_type);
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
|
|
|
|
|
if (dbobj->obj_changed_type == NML_DBUS_OBJ_CHANGED_TYPE_NONE) {
|
|
|
|
|
NMClientPrivate *priv;
|
|
|
|
|
|
|
|
|
|
/* We set the changed-type flag. Need to queue the object in the
|
|
|
|
|
* changed list. */
|
|
|
|
|
nm_assert(c_list_is_empty(&dbobj->obj_changed_lst));
|
|
|
|
|
priv = NM_CLIENT_GET_PRIVATE(self);
|
|
|
|
|
c_list_link_tail(&priv->obj_changed_lst_head, &dbobj->obj_changed_lst);
|
|
|
|
|
} else {
|
|
|
|
|
/* The object has changes flags and must be linked already. Note that
|
|
|
|
|
* this may be priv->obj_changed_lst_head, or a temporary list on the
|
|
|
|
|
* stack.
|
|
|
|
|
*
|
|
|
|
|
* This dance with the temporary list is done to ensure we can enqueue
|
|
|
|
|
* objects while we process the changes. */
|
|
|
|
|
nm_assert(!c_list_is_empty(&dbobj->obj_changed_lst));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
dbobj->obj_changed_type |= changed_type;
|
|
|
|
|
|
|
|
|
|
nm_assert(NM_FLAGS_ALL(dbobj->obj_changed_type, changed_type));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static NMLDBusObjChangedType
|
2021-11-09 13:28:54 +01:00
|
|
|
nml_dbus_object_obj_changed_consume(NMClient *self,
|
|
|
|
|
NMLDBusObject *dbobj,
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
NMLDBusObjChangedType changed_type)
|
|
|
|
|
{
|
2021-11-09 13:28:54 +01:00
|
|
|
NMClientPrivate *priv;
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
NMLDBusObjChangedType changed_type_res;
|
|
|
|
|
|
2022-01-27 22:09:26 +01:00
|
|
|
/* We have @dbobj which has some "obj_changed_type" set (consequently,
|
|
|
|
|
* it's linked in the "obj_changed_lst"). Here we consume the @changed_type,
|
|
|
|
|
* meaning, to clear those flags from "obj_change_type" (and return
|
|
|
|
|
* the flags that were cleared/present or NONE, if the current object
|
|
|
|
|
* doesn't have these changed-types. */
|
|
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
nm_assert(NM_IS_CLIENT(self));
|
|
|
|
|
nm_assert(NML_IS_DBUS_OBJECT(dbobj));
|
|
|
|
|
nm_assert(changed_type != NML_DBUS_OBJ_CHANGED_TYPE_NONE);
|
|
|
|
|
nm_assert(dbobj->obj_changed_type != NML_DBUS_OBJ_CHANGED_TYPE_NONE);
|
|
|
|
|
nm_assert(!c_list_is_empty(&dbobj->obj_changed_lst));
|
|
|
|
|
|
|
|
|
|
changed_type_res = dbobj->obj_changed_type & changed_type;
|
|
|
|
|
|
|
|
|
|
dbobj->obj_changed_type &= ~changed_type;
|
|
|
|
|
|
|
|
|
|
if (dbobj->obj_changed_type == NML_DBUS_OBJ_CHANGED_TYPE_NONE) {
|
2022-01-27 22:09:26 +01:00
|
|
|
/* No other "obj_change_type" left. Unlink the object from the
|
|
|
|
|
* "changed_type_list". */
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
c_list_unlink(&dbobj->obj_changed_lst);
|
|
|
|
|
nm_assert(changed_type_res != NML_DBUS_OBJ_CHANGED_TYPE_NONE);
|
2020-01-30 16:54:28 +01:00
|
|
|
NML_NMCLIENT_LOG_T(self,
|
|
|
|
|
"[%s]: changed-type 0x%02x consumed",
|
|
|
|
|
dbobj->dbus_path->str,
|
|
|
|
|
(guint) changed_type_res);
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
return changed_type_res;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
priv = NM_CLIENT_GET_PRIVATE(self);
|
|
|
|
|
|
2022-01-27 22:09:26 +01:00
|
|
|
/* Actually, at this point, @dbobj is not linked in priv->obj_changed_lst_head,
|
|
|
|
|
* instead, it's linked on a temporary list. As we still have changes left after
|
|
|
|
|
* consuming "changed_type", we move it to priv->obj_changed_lst_head. */
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
nm_assert(!c_list_contains(&priv->obj_changed_lst_head, &dbobj->obj_changed_lst));
|
|
|
|
|
nm_c_list_move_tail(&priv->obj_changed_lst_head, &dbobj->obj_changed_lst);
|
2020-01-30 16:54:28 +01:00
|
|
|
NML_NMCLIENT_LOG_T(self,
|
|
|
|
|
"[%s]: changed-type 0x%02x consumed (still has 0x%02x)",
|
|
|
|
|
dbobj->dbus_path->str,
|
|
|
|
|
(guint) changed_type_res,
|
|
|
|
|
(guint) dbobj->obj_changed_type);
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
return changed_type_res;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static gboolean
|
|
|
|
|
nml_dbus_object_obj_changed_any_linked(NMClient *self, NMLDBusObjChangedType changed_type)
|
|
|
|
|
{
|
|
|
|
|
NMClientPrivate *priv = NM_CLIENT_GET_PRIVATE(self);
|
2021-11-09 13:28:54 +01:00
|
|
|
NMLDBusObject *dbobj;
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
|
|
|
|
|
nm_assert(changed_type != NML_DBUS_OBJ_CHANGED_TYPE_NONE);
|
|
|
|
|
|
|
|
|
|
c_list_for_each_entry (dbobj, &priv->obj_changed_lst_head, obj_changed_lst) {
|
|
|
|
|
nm_assert(dbobj->obj_changed_type != NML_DBUS_OBJ_CHANGED_TYPE_NONE);
|
|
|
|
|
if (NM_FLAGS_ANY(dbobj->obj_changed_type, changed_type))
|
|
|
|
|
return TRUE;
|
|
|
|
|
}
|
|
|
|
|
return FALSE;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*****************************************************************************/
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
_dbobjs_notify_watchers_for_dbobj(NMClient *self, NMLDBusObject *dbobj)
|
|
|
|
|
{
|
|
|
|
|
NMLDBusObjWatcher *obj_watcher;
|
|
|
|
|
NMLDBusObjWatcher *obj_watcher_safe;
|
|
|
|
|
|
|
|
|
|
c_list_for_each_entry_safe (obj_watcher,
|
|
|
|
|
obj_watcher_safe,
|
|
|
|
|
&dbobj->watcher_lst_head,
|
|
|
|
|
_priv.watcher_lst)
|
|
|
|
|
obj_watcher->_priv.notify_fcn(self, obj_watcher);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static gboolean
|
|
|
|
|
_dbobjs_check_dbobj_ready(NMClient *self, NMLDBusObject *dbobj)
|
|
|
|
|
{
|
|
|
|
|
nm_assert(NM_IS_CLIENT(self));
|
|
|
|
|
nm_assert(NML_IS_DBUS_OBJECT(dbobj));
|
|
|
|
|
nm_assert(G_IS_OBJECT(dbobj->nmobj));
|
|
|
|
|
nm_assert(NM_IS_OBJECT(dbobj->nmobj) || NM_IS_CLIENT(dbobj->nmobj));
|
|
|
|
|
nm_assert(NM_IN_SET((NMLDBusObjState) dbobj->obj_state,
|
|
|
|
|
NML_DBUS_OBJ_STATE_WITH_NMOBJ_NOT_READY,
|
|
|
|
|
NML_DBUS_OBJ_STATE_WITH_NMOBJ_READY));
|
|
|
|
|
|
|
|
|
|
if (G_LIKELY(dbobj->obj_state == NML_DBUS_OBJ_STATE_WITH_NMOBJ_READY))
|
|
|
|
|
return TRUE;
|
|
|
|
|
|
|
|
|
|
if (!NM_OBJECT_GET_CLASS(dbobj->nmobj)->is_ready(NM_OBJECT(dbobj->nmobj)))
|
|
|
|
|
return FALSE;
|
|
|
|
|
|
|
|
|
|
nml_dbus_object_set_obj_state(dbobj, NML_DBUS_OBJ_STATE_WITH_NMOBJ_READY, self);
|
|
|
|
|
|
|
|
|
|
nml_dbus_object_obj_changed_link(self, dbobj, NML_DBUS_OBJ_CHANGED_TYPE_NMOBJ);
|
|
|
|
|
_dbobjs_notify_watchers_for_dbobj(self, dbobj);
|
|
|
|
|
|
|
|
|
|
return TRUE;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
_nm_client_notify_object_changed(NMClient *self, NMLDBusObject *dbobj)
|
|
|
|
|
{
|
|
|
|
|
nml_dbus_object_obj_changed_link(self, dbobj, NML_DBUS_OBJ_CHANGED_TYPE_NMOBJ);
|
|
|
|
|
_dbobjs_notify_watchers_for_dbobj(self, dbobj);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*****************************************************************************/
|
|
|
|
|
|
|
|
|
|
static NMLDBusObject *
|
|
|
|
|
_dbobjs_dbobj_get_r(NMClient *self, NMRefString *dbus_path_r)
|
|
|
|
|
{
|
|
|
|
|
nm_assert(NM_IS_REF_STRING(dbus_path_r));
|
|
|
|
|
|
|
|
|
|
return g_hash_table_lookup(NM_CLIENT_GET_PRIVATE(self)->dbus_objects, &dbus_path_r);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static NMLDBusObject *
|
|
|
|
|
_dbobjs_dbobj_get_s(NMClient *self, const char *dbus_path)
|
|
|
|
|
{
|
|
|
|
|
nm_auto_ref_string NMRefString *dbus_path_r = NULL;
|
|
|
|
|
|
|
|
|
|
nm_assert(dbus_path);
|
|
|
|
|
dbus_path_r = nm_ref_string_new(dbus_path);
|
|
|
|
|
return _dbobjs_dbobj_get_r(self, dbus_path_r);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static NMLDBusObject *
|
|
|
|
|
_dbobjs_dbobj_create(NMClient *self, NMRefString *dbus_path_take)
|
|
|
|
|
{
|
|
|
|
|
nm_auto_ref_string NMRefString *dbus_path = g_steal_pointer(&dbus_path_take);
|
2021-11-09 13:28:54 +01:00
|
|
|
NMClientPrivate *priv = NM_CLIENT_GET_PRIVATE(self);
|
|
|
|
|
NMLDBusObject *dbobj;
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
|
|
|
|
|
nm_assert(!_dbobjs_dbobj_get_r(self, dbus_path));
|
|
|
|
|
|
|
|
|
|
dbobj = nml_dbus_object_new(g_steal_pointer(&dbus_path));
|
|
|
|
|
if (!g_hash_table_add(priv->dbus_objects, dbobj))
|
|
|
|
|
nm_assert_not_reached();
|
|
|
|
|
return dbobj;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static NMLDBusObject *
|
|
|
|
|
_dbobjs_dbobj_get_or_create(NMClient *self, NMRefString *dbus_path_take)
|
|
|
|
|
{
|
|
|
|
|
nm_auto_ref_string NMRefString *dbus_path = g_steal_pointer(&dbus_path_take);
|
2021-11-09 13:28:54 +01:00
|
|
|
NMLDBusObject *dbobj;
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
|
|
|
|
|
dbobj = _dbobjs_dbobj_get_r(self, dbus_path);
|
|
|
|
|
if (dbobj)
|
|
|
|
|
return dbobj;
|
|
|
|
|
return _dbobjs_dbobj_create(self, g_steal_pointer(&dbus_path));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static NMLDBusObject *
|
|
|
|
|
_dbobjs_get_nmobj(NMClient *self, const char *dbus_path, GType gtype)
|
|
|
|
|
{
|
|
|
|
|
NMLDBusObject *dbobj;
|
|
|
|
|
|
|
|
|
|
nm_assert(gtype == G_TYPE_NONE || g_type_is_a(gtype, NM_TYPE_OBJECT));
|
|
|
|
|
|
|
|
|
|
dbobj = _dbobjs_dbobj_get_s(self, dbus_path);
|
|
|
|
|
|
|
|
|
|
if (!dbobj)
|
|
|
|
|
return NULL;
|
|
|
|
|
if (!dbobj->nmobj)
|
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
|
|
if (gtype != G_TYPE_NONE && !g_type_is_a(G_OBJECT_TYPE(dbobj->nmobj), gtype))
|
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
|
|
return dbobj;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static gpointer
|
|
|
|
|
_dbobjs_get_nmobj_unpack_visible(NMClient *self, const char *dbus_path, GType gtype)
|
|
|
|
|
{
|
|
|
|
|
NMLDBusObject *dbobj;
|
|
|
|
|
|
|
|
|
|
dbobj = _dbobjs_get_nmobj(self, dbus_path, gtype);
|
|
|
|
|
if (!dbobj)
|
|
|
|
|
return NULL;
|
|
|
|
|
if (dbobj->obj_state != NML_DBUS_OBJ_STATE_WITH_NMOBJ_READY)
|
|
|
|
|
return NULL;
|
|
|
|
|
return dbobj->nmobj;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*****************************************************************************/
|
|
|
|
|
|
|
|
|
|
static gpointer
|
2021-11-09 13:28:54 +01:00
|
|
|
_dbobjs_obj_watcher_register_o(NMClient *self,
|
|
|
|
|
NMLDBusObject *dbobj,
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
NMLDBusObjWatchNotifyFcn notify_fcn,
|
|
|
|
|
gsize struct_size)
|
|
|
|
|
{
|
|
|
|
|
NMLDBusObjWatcher *obj_watcher;
|
|
|
|
|
|
|
|
|
|
nm_assert(NM_IS_CLIENT(self));
|
|
|
|
|
_ASSERT_dbobj(dbobj, self);
|
|
|
|
|
nm_assert(notify_fcn);
|
|
|
|
|
nm_assert(struct_size > sizeof(NMLDBusObjWatcher));
|
|
|
|
|
|
|
|
|
|
obj_watcher = g_malloc(struct_size);
|
|
|
|
|
obj_watcher->dbobj = dbobj;
|
|
|
|
|
obj_watcher->_priv.notify_fcn = notify_fcn;
|
|
|
|
|
|
|
|
|
|
/* we must enqueue the item in the front of the list. That is, because while
|
|
|
|
|
* invoking notify_fcn(), we iterate the watchers front-to-end. As we want to
|
|
|
|
|
* allow the callee to register new watches and unregister itself, this is
|
|
|
|
|
* the right way to do it. */
|
|
|
|
|
c_list_link_front(&dbobj->watcher_lst_head, &obj_watcher->_priv.watcher_lst);
|
|
|
|
|
|
|
|
|
|
return obj_watcher;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static gpointer
|
2021-11-09 13:28:54 +01:00
|
|
|
_dbobjs_obj_watcher_register_r(NMClient *self,
|
|
|
|
|
NMRefString *dbus_path_take,
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
NMLDBusObjWatchNotifyFcn notify_fcn,
|
|
|
|
|
gsize struct_size)
|
|
|
|
|
{
|
|
|
|
|
nm_auto_ref_string NMRefString *dbus_path = g_steal_pointer(&dbus_path_take);
|
2021-11-09 13:28:54 +01:00
|
|
|
NMLDBusObject *dbobj;
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
|
|
|
|
|
nm_assert(NM_IS_CLIENT(self));
|
|
|
|
|
nm_assert(notify_fcn);
|
|
|
|
|
|
|
|
|
|
dbobj = _dbobjs_dbobj_get_or_create(self, g_steal_pointer(&dbus_path));
|
|
|
|
|
if (dbobj->obj_state == NML_DBUS_OBJ_STATE_UNLINKED)
|
|
|
|
|
nml_dbus_object_set_obj_state(dbobj, NML_DBUS_OBJ_STATE_WATCHED_ONLY, self);
|
|
|
|
|
return _dbobjs_obj_watcher_register_o(self, dbobj, notify_fcn, struct_size);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
_dbobjs_obj_watcher_unregister(NMClient *self, gpointer obj_watcher_base)
|
|
|
|
|
{
|
|
|
|
|
NMLDBusObjWatcher *obj_watcher = obj_watcher_base;
|
2021-11-09 13:28:54 +01:00
|
|
|
NMLDBusObject *dbobj;
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
|
|
|
|
|
nm_assert(NM_IS_CLIENT(self));
|
|
|
|
|
nm_assert(obj_watcher);
|
|
|
|
|
nm_assert(NML_IS_DBUS_OBJECT(obj_watcher->dbobj));
|
|
|
|
|
nm_assert(g_hash_table_lookup(NM_CLIENT_GET_PRIVATE(self)->dbus_objects, obj_watcher->dbobj)
|
|
|
|
|
== obj_watcher->dbobj);
|
|
|
|
|
nm_assert(
|
|
|
|
|
c_list_contains(&obj_watcher->dbobj->watcher_lst_head, &obj_watcher->_priv.watcher_lst));
|
|
|
|
|
|
|
|
|
|
c_list_unlink(&obj_watcher->_priv.watcher_lst);
|
|
|
|
|
|
|
|
|
|
dbobj = obj_watcher->dbobj;
|
|
|
|
|
|
|
|
|
|
g_free(obj_watcher);
|
|
|
|
|
|
|
|
|
|
if (c_list_is_empty(&dbobj->iface_lst_head) && c_list_is_empty(&dbobj->watcher_lst_head)) {
|
|
|
|
|
NMClientPrivate *priv = NM_CLIENT_GET_PRIVATE(self);
|
|
|
|
|
|
|
|
|
|
NML_NMCLIENT_LOG_T(self, "[%s]: drop D-Bus watcher", dbobj->dbus_path->str);
|
|
|
|
|
nml_dbus_object_set_obj_state(dbobj, NML_DBUS_OBJ_STATE_UNLINKED, self);
|
|
|
|
|
if (!g_hash_table_steal(priv->dbus_objects, dbobj))
|
|
|
|
|
nm_assert_not_reached();
|
|
|
|
|
nml_dbus_object_unref(dbobj);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*****************************************************************************/
|
|
|
|
|
|
|
|
|
|
typedef struct {
|
|
|
|
|
NMLDBusObjWatcher parent;
|
|
|
|
|
NMLDBusPropertyO *pr_o;
|
|
|
|
|
} PropertyOData;
|
|
|
|
|
|
|
|
|
|
gpointer
|
|
|
|
|
nml_dbus_property_o_get_obj(NMLDBusPropertyO *pr_o)
|
|
|
|
|
{
|
|
|
|
|
nm_assert(!pr_o->nmobj || nml_dbus_property_o_is_ready(pr_o));
|
|
|
|
|
return pr_o->nmobj;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
gboolean
|
|
|
|
|
nml_dbus_property_o_is_ready(const NMLDBusPropertyO *pr_o)
|
|
|
|
|
{
|
|
|
|
|
return pr_o->is_ready || !pr_o->owner_dbobj;
|
|
|
|
|
}
|
|
|
|
|
|
libnm: hide NMActiveConnection until NMRemoteConnection is ready
Generally, libnm's NMClient cache only wants to expose NMObjects that
are fully initalized. Most objects don't require anything special,
except NMRemoteConnection which waits for the GetSettings() call to complete.
NMObjects reference each other. For example, NMActiveConnection
references NMDevice and NMRemoteConnection. There is a desire that an
object is only ready, if the objects that it references are ready too.
In practice that is not done, because usually every objects references
other objects, that means all objects would be declared as non-ready
as long as any of them is still initializing. That does not seem
desirable. Instead, most objects (except NMRemoteConnection and now
NMActiveConnection) are considered ready and visible, once their first
notification completes. In case the objects reference any object that is
not yet ready, the references is NULL (but the source object is visible
already). This is also done this way, to cope with cycles where
objects reference each other. In practice, such cycles should not be
exposed by NetworkManager. However, libnm should be robust against that.
This has the undesired effect that when you call AddAndActivate(), then
the NMActiveConnection might already be visible while its
NMRemoteConnection isn't. That means, ac.get_connection() will
initially return NULL, until the remote connection becomes ready.
Also add a special handling that NMActiveConnection waits for their
NMRemoteConnection to be ready, before being ready itself.
Fixes: ce0e898fb476 ('libnm: refactor caching of D-Bus objects in NMClient')
2020-01-31 00:54:46 +01:00
|
|
|
gboolean
|
|
|
|
|
nml_dbus_property_o_is_ready_fully(const NMLDBusPropertyO *pr_o)
|
|
|
|
|
{
|
|
|
|
|
return !pr_o->owner_dbobj || !pr_o->obj_watcher || pr_o->nmobj;
|
|
|
|
|
}
|
|
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
static void
|
|
|
|
|
nml_dbus_property_o_notify_changed(NMLDBusPropertyO *pr_o, NMClient *self)
|
|
|
|
|
{
|
|
|
|
|
const NMLDBusPropertVTableO *vtable;
|
2021-11-09 13:28:54 +01:00
|
|
|
GObject *nmobj = NULL;
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
gboolean is_ready = TRUE;
|
|
|
|
|
gboolean changed_ready;
|
|
|
|
|
GType gtype;
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
nm_assert(pr_o);
|
|
|
|
|
nm_assert(NM_IS_CLIENT(self));
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
if (!pr_o->owner_dbobj)
|
|
|
|
|
return;
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
if (!pr_o->is_changed) {
|
|
|
|
|
if (pr_o->is_ready)
|
|
|
|
|
return;
|
|
|
|
|
goto done;
|
|
|
|
|
}
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
pr_o->is_changed = FALSE;
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
if (!pr_o->obj_watcher)
|
|
|
|
|
goto done;
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
if (!pr_o->obj_watcher->dbobj->nmobj) {
|
|
|
|
|
if (pr_o->obj_watcher->dbobj->obj_state >= NML_DBUS_OBJ_STATE_ON_DBUS) {
|
|
|
|
|
NML_NMCLIENT_LOG_W(
|
|
|
|
|
self,
|
|
|
|
|
"[%s]: property %s references %s but object is not created",
|
|
|
|
|
pr_o->owner_dbobj->dbus_path->str,
|
|
|
|
|
pr_o->meta_iface->dbus_properties[pr_o->dbus_property_idx].dbus_property_name,
|
|
|
|
|
pr_o->obj_watcher->dbobj->dbus_path->str);
|
|
|
|
|
} else {
|
|
|
|
|
NML_NMCLIENT_LOG_E(
|
|
|
|
|
self,
|
|
|
|
|
"[%s]: property %s references %s but object is not present on D-Bus",
|
|
|
|
|
pr_o->owner_dbobj->dbus_path->str,
|
|
|
|
|
pr_o->meta_iface->dbus_properties[pr_o->dbus_property_idx].dbus_property_name,
|
|
|
|
|
pr_o->obj_watcher->dbobj->dbus_path->str);
|
|
|
|
|
}
|
|
|
|
|
goto done;
|
|
|
|
|
}
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
vtable = pr_o->meta_iface->dbus_properties[pr_o->dbus_property_idx].extra.property_vtable_o;
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
gtype = vtable->get_o_type_fcn();
|
|
|
|
|
if (!g_type_is_a(G_OBJECT_TYPE(pr_o->obj_watcher->dbobj->nmobj), gtype)) {
|
|
|
|
|
NML_NMCLIENT_LOG_E(
|
|
|
|
|
self,
|
|
|
|
|
"[%s]: property %s references %s with unexpected GObject type %s instead of %s",
|
|
|
|
|
pr_o->owner_dbobj->dbus_path->str,
|
|
|
|
|
pr_o->meta_iface->dbus_properties[pr_o->dbus_property_idx].dbus_property_name,
|
|
|
|
|
pr_o->obj_watcher->dbobj->dbus_path->str,
|
|
|
|
|
G_OBJECT_TYPE_NAME(pr_o->obj_watcher->dbobj->nmobj),
|
|
|
|
|
g_type_name(gtype));
|
|
|
|
|
goto done;
|
|
|
|
|
}
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
if (pr_o->obj_watcher->dbobj == pr_o->owner_dbobj) {
|
|
|
|
|
NML_NMCLIENT_LOG_W(
|
|
|
|
|
self,
|
|
|
|
|
"[%s]: property %s references itself",
|
|
|
|
|
pr_o->owner_dbobj->dbus_path->str,
|
|
|
|
|
pr_o->meta_iface->dbus_properties[pr_o->dbus_property_idx].dbus_property_name);
|
|
|
|
|
nmobj = pr_o->owner_dbobj->nmobj;
|
|
|
|
|
goto done;
|
|
|
|
|
}
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
pr_o->block_is_changed = TRUE;
|
|
|
|
|
is_ready = _dbobjs_check_dbobj_ready(self, pr_o->obj_watcher->dbobj);
|
|
|
|
|
pr_o->block_is_changed = FALSE;
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
if (!is_ready) {
|
|
|
|
|
is_ready = vtable->is_always_ready;
|
|
|
|
|
goto done;
|
|
|
|
|
}
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
nmobj = pr_o->obj_watcher->dbobj->nmobj;
|
|
|
|
|
|
|
|
|
|
done:
|
|
|
|
|
changed_ready = FALSE;
|
|
|
|
|
if (!pr_o->is_ready && is_ready) {
|
|
|
|
|
pr_o->is_ready = TRUE;
|
|
|
|
|
changed_ready = TRUE;
|
|
|
|
|
}
|
|
|
|
|
if (pr_o->nmobj != nmobj) {
|
|
|
|
|
pr_o->nmobj = nmobj;
|
|
|
|
|
_nm_client_queue_notify_object(
|
|
|
|
|
self,
|
|
|
|
|
pr_o->owner_dbobj->nmobj,
|
|
|
|
|
pr_o->meta_iface->obj_properties
|
|
|
|
|
[pr_o->meta_iface->dbus_properties[pr_o->dbus_property_idx].obj_properties_idx]);
|
|
|
|
|
}
|
|
|
|
|
if (changed_ready && pr_o->owner_dbobj->obj_state == NML_DBUS_OBJ_STATE_WITH_NMOBJ_NOT_READY)
|
|
|
|
|
nml_dbus_object_obj_changed_link(self, pr_o->owner_dbobj, NML_DBUS_OBJ_CHANGED_TYPE_NMOBJ);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
nml_dbus_property_o_notify_changed_many(NMLDBusPropertyO *ptr, guint len, NMClient *self)
|
|
|
|
|
{
|
|
|
|
|
while (len-- > 0)
|
|
|
|
|
nml_dbus_property_o_notify_changed(ptr++, self);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
nml_dbus_property_o_notify_watch_cb(NMClient *self, gpointer obj_watcher)
|
|
|
|
|
{
|
2021-11-09 13:28:54 +01:00
|
|
|
PropertyOData *pr_o_data = obj_watcher;
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
NMLDBusPropertyO *pr_o = pr_o_data->pr_o;
|
|
|
|
|
|
|
|
|
|
nm_assert(pr_o->obj_watcher == obj_watcher);
|
|
|
|
|
|
|
|
|
|
if (!pr_o->block_is_changed && !pr_o->is_changed) {
|
|
|
|
|
pr_o->is_changed = TRUE;
|
|
|
|
|
nml_dbus_object_obj_changed_link(self, pr_o->owner_dbobj, NML_DBUS_OBJ_CHANGED_TYPE_NMOBJ);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2023-02-01 10:13:14 -05:00
|
|
|
NMLDBusNotifyUpdatePropFlags
|
2021-11-09 13:28:54 +01:00
|
|
|
nml_dbus_property_o_notify(NMClient *self,
|
|
|
|
|
NMLDBusPropertyO *pr_o,
|
|
|
|
|
NMLDBusObject *dbobj,
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
const NMLDBusMetaIface *meta_iface,
|
|
|
|
|
guint dbus_property_idx,
|
2021-11-09 13:28:54 +01:00
|
|
|
GVariant *value)
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
{
|
|
|
|
|
const char *dbus_path = NULL;
|
|
|
|
|
gboolean changed = FALSE;
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
if (!pr_o->owner_dbobj) {
|
|
|
|
|
nm_assert(!pr_o->meta_iface);
|
|
|
|
|
nm_assert(pr_o->dbus_property_idx == 0);
|
|
|
|
|
nm_assert(!pr_o->is_ready);
|
|
|
|
|
pr_o->owner_dbobj = dbobj;
|
|
|
|
|
pr_o->meta_iface = meta_iface;
|
|
|
|
|
pr_o->dbus_property_idx = dbus_property_idx;
|
|
|
|
|
} else {
|
|
|
|
|
nm_assert(pr_o->owner_dbobj == dbobj);
|
|
|
|
|
nm_assert(pr_o->meta_iface == meta_iface);
|
|
|
|
|
nm_assert(pr_o->dbus_property_idx == dbus_property_idx);
|
|
|
|
|
}
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
if (value)
|
|
|
|
|
dbus_path = nm_dbus_path_not_empty(g_variant_get_string(value, NULL));
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
if (pr_o->obj_watcher
|
|
|
|
|
&& (!dbus_path || !nm_streq(dbus_path, pr_o->obj_watcher->dbobj->dbus_path->str))) {
|
|
|
|
|
_dbobjs_obj_watcher_unregister(self, g_steal_pointer(&pr_o->obj_watcher));
|
|
|
|
|
changed = TRUE;
|
|
|
|
|
}
|
|
|
|
|
if (!pr_o->obj_watcher && dbus_path) {
|
|
|
|
|
pr_o->obj_watcher = _dbobjs_obj_watcher_register_r(self,
|
|
|
|
|
nm_ref_string_new(dbus_path),
|
|
|
|
|
nml_dbus_property_o_notify_watch_cb,
|
|
|
|
|
sizeof(PropertyOData));
|
|
|
|
|
((PropertyOData *) pr_o->obj_watcher)->pr_o = pr_o;
|
|
|
|
|
changed = TRUE;
|
|
|
|
|
}
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
if (changed && !pr_o->is_changed) {
|
|
|
|
|
pr_o->is_changed = TRUE;
|
|
|
|
|
nml_dbus_object_obj_changed_link(self, dbobj, NML_DBUS_OBJ_CHANGED_TYPE_NMOBJ);
|
|
|
|
|
}
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
return NML_DBUS_NOTIFY_UPDATE_PROP_FLAGS_NONE;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
nml_dbus_property_o_clear(NMLDBusPropertyO *pr_o, NMClient *self)
|
|
|
|
|
{
|
|
|
|
|
if (pr_o->obj_watcher) {
|
|
|
|
|
nm_assert(NM_IS_CLIENT(self));
|
|
|
|
|
_dbobjs_obj_watcher_unregister(self, g_steal_pointer(&pr_o->obj_watcher));
|
|
|
|
|
}
|
|
|
|
|
if (pr_o->nmobj && pr_o->owner_dbobj && pr_o->owner_dbobj->nmobj) {
|
|
|
|
|
_nm_client_queue_notify_object(
|
|
|
|
|
self,
|
|
|
|
|
pr_o->owner_dbobj->nmobj,
|
|
|
|
|
pr_o->meta_iface->obj_properties
|
|
|
|
|
[pr_o->meta_iface->dbus_properties[pr_o->dbus_property_idx].obj_properties_idx]);
|
|
|
|
|
}
|
|
|
|
|
pr_o->owner_dbobj = NULL;
|
|
|
|
|
pr_o->meta_iface = NULL;
|
|
|
|
|
pr_o->dbus_property_idx = 0;
|
2019-11-26 12:31:23 +01:00
|
|
|
pr_o->is_ready = FALSE;
|
libnm: fix dangling pointer in "o" properties when unregistering NMObject
When NMClient gets destroyed, it unrefs all NMObject. We need to unbreak
cycles then, and the property getters must return NULL. In particular,
for "o" type properties (NMLDBusPropertyO), this was not done correctly.
For example, calling nm_device_get_active_connection() while/after
destroying the NMClient can give a dangling pointer and assertion
failure. This will also be covered by test_activate_virtual(). Probably
a similar issue can happen, when a D-Bus object gets removed (without
destroying NMClient altogether).
The fix is that nml_dbus_property_o_clear() needs to clear "nmobj". That
is correct, because the pointer is no longer valid and should not be there.
And the unit test shows that in fact a pointer is left there, and
clearing it fixes it.
That was different from an earlier attempt to fix this (in commit 62b2aa85e875
('Revert "libnm: fix dangling pointer in public API while destructing NMClient"')),
where clearing the pointer at a different place broke things. That
attempt was wrong, because nml_dbus_property_o_notify_changed() needs to be the
one that sets/clears nmobj field during a regular update. But the case
here is not a regular update, nml_dbus_property_o_clear() happens during
unregister/cleanup, and then we need to clear the pointer.
Fixes: ce0e898fb476 ('libnm: refactor caching of D-Bus objects in NMClient')
https://bugzilla.redhat.com/show_bug.cgi?id=2039331
https://gitlab.freedesktop.org/NetworkManager/NetworkManager/-/issues/896
See-also: https://gitlab.freedesktop.org/NetworkManager/NetworkManager/-/merge_requests/1064
https://gitlab.freedesktop.org/NetworkManager/NetworkManager/-/merge_requests/1075
2022-01-28 08:24:36 +01:00
|
|
|
pr_o->nmobj = NULL;
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
nml_dbus_property_o_clear_many(NMLDBusPropertyO *pr_o, guint len, NMClient *self)
|
|
|
|
|
{
|
|
|
|
|
while (len-- > 0)
|
|
|
|
|
nml_dbus_property_o_clear(pr_o++, self);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*****************************************************************************/
|
|
|
|
|
|
|
|
|
|
typedef struct _NMLDBusPropertyAOData {
|
|
|
|
|
NMLDBusObjWatcher obj_watcher;
|
2021-11-09 13:28:54 +01:00
|
|
|
NMLDBusPropertyAO *parent;
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
CList data_lst;
|
2021-11-09 13:28:54 +01:00
|
|
|
GObject *nmobj;
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
struct _NMLDBusPropertyAOData *changed_next;
|
|
|
|
|
bool is_ready : 1;
|
|
|
|
|
bool is_notified : 1;
|
|
|
|
|
bool is_changed : 1;
|
|
|
|
|
bool block_is_changed : 1;
|
|
|
|
|
} PropertyAOData;
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
_ASSERT_pr_ao(NMLDBusPropertyAO *pr_ao)
|
|
|
|
|
{
|
|
|
|
|
nm_assert(pr_ao);
|
|
|
|
|
|
|
|
|
|
#if NM_MORE_ASSERTS > 10
|
|
|
|
|
if (pr_ao->owner_dbobj) {
|
|
|
|
|
guint n_not_ready = 0;
|
|
|
|
|
guint n_is_changed = 0;
|
|
|
|
|
guint n_is_changed_2;
|
|
|
|
|
PropertyAOData *pr_ao_data;
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
c_list_for_each_entry (pr_ao_data, &pr_ao->data_lst_head, data_lst) {
|
|
|
|
|
if (pr_ao_data->is_changed)
|
|
|
|
|
n_is_changed++;
|
|
|
|
|
if (!pr_ao_data->is_ready)
|
|
|
|
|
n_not_ready++;
|
|
|
|
|
}
|
|
|
|
|
nm_assert(n_not_ready == pr_ao->n_not_ready);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
n_is_changed_2 = 0;
|
|
|
|
|
pr_ao_data = pr_ao->changed_head;
|
|
|
|
|
while (pr_ao_data) {
|
|
|
|
|
nm_assert(pr_ao_data->is_changed);
|
|
|
|
|
n_is_changed_2++;
|
|
|
|
|
pr_ao_data = pr_ao_data->changed_next;
|
|
|
|
|
}
|
|
|
|
|
nm_assert(n_is_changed == n_is_changed_2);
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static gboolean
|
|
|
|
|
nml_dbus_property_ao_notify_changed_ao(PropertyAOData *pr_ao_data,
|
2021-11-09 13:28:54 +01:00
|
|
|
NMClient *self,
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
gboolean is_added /* or else removed */)
|
|
|
|
|
{
|
2021-11-09 13:28:54 +01:00
|
|
|
NMLDBusPropertyAO *pr_ao;
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
const NMLDBusPropertVTableAO *vtable;
|
|
|
|
|
|
|
|
|
|
if (!pr_ao_data->nmobj)
|
|
|
|
|
return FALSE;
|
|
|
|
|
|
|
|
|
|
nm_assert(pr_ao_data->is_ready);
|
|
|
|
|
|
|
|
|
|
if (is_added) {
|
|
|
|
|
if (pr_ao_data->is_notified)
|
|
|
|
|
return FALSE;
|
|
|
|
|
pr_ao_data->is_notified = TRUE;
|
|
|
|
|
} else {
|
|
|
|
|
if (!pr_ao_data->is_notified)
|
|
|
|
|
return FALSE;
|
|
|
|
|
pr_ao_data->is_notified = FALSE;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pr_ao = pr_ao_data->parent;
|
|
|
|
|
|
|
|
|
|
vtable = pr_ao->meta_iface->dbus_properties[pr_ao->dbus_property_idx].extra.property_vtable_ao;
|
|
|
|
|
|
|
|
|
|
if (vtable->notify_changed_ao)
|
|
|
|
|
vtable->notify_changed_ao(pr_ao, self, NM_OBJECT(pr_ao_data->nmobj), is_added);
|
|
|
|
|
return TRUE;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const GPtrArray *
|
|
|
|
|
nml_dbus_property_ao_get_objs_as_ptrarray(NMLDBusPropertyAO *pr_ao)
|
|
|
|
|
{
|
|
|
|
|
if (!pr_ao->arr) {
|
|
|
|
|
PropertyAOData *pr_ao_data;
|
|
|
|
|
gsize n;
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
n = 0;
|
|
|
|
|
if (pr_ao->owner_dbobj) {
|
|
|
|
|
c_list_for_each_entry (pr_ao_data, &pr_ao->data_lst_head, data_lst) {
|
|
|
|
|
if (pr_ao_data->nmobj)
|
|
|
|
|
n++;
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
pr_ao->arr = g_ptr_array_new_full(n, g_object_unref);
|
|
|
|
|
if (pr_ao->owner_dbobj) {
|
|
|
|
|
c_list_for_each_entry (pr_ao_data, &pr_ao->data_lst_head, data_lst) {
|
|
|
|
|
if (pr_ao_data->nmobj)
|
|
|
|
|
g_ptr_array_add(pr_ao->arr, g_object_ref(pr_ao_data->nmobj));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return pr_ao->arr;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
gboolean
|
|
|
|
|
nml_dbus_property_ao_is_ready(const NMLDBusPropertyAO *pr_ao)
|
|
|
|
|
{
|
|
|
|
|
return pr_ao->n_not_ready == 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
nml_dbus_property_ao_notify_changed(NMLDBusPropertyAO *pr_ao, NMClient *self)
|
|
|
|
|
{
|
|
|
|
|
gboolean changed_prop = FALSE;
|
|
|
|
|
gboolean changed_ready = FALSE;
|
|
|
|
|
PropertyAOData *pr_ao_data;
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
nm_assert(NM_IS_CLIENT(self));
|
|
|
|
|
_ASSERT_pr_ao(pr_ao);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
if (!pr_ao->owner_dbobj)
|
|
|
|
|
return;
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
if (!pr_ao->is_changed) {
|
|
|
|
|
if (pr_ao->n_not_ready == 0)
|
|
|
|
|
return;
|
|
|
|
|
goto done;
|
|
|
|
|
}
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
pr_ao->is_changed = FALSE;
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
while (pr_ao->changed_head) {
|
|
|
|
|
const NMLDBusPropertVTableAO *vtable;
|
2021-11-09 13:28:54 +01:00
|
|
|
GObject *nmobj = NULL;
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
gboolean is_ready = TRUE;
|
|
|
|
|
GType gtype;
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
pr_ao_data = g_steal_pointer(&pr_ao->changed_head);
|
|
|
|
|
nm_assert(pr_ao_data->is_changed);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
pr_ao->changed_head = pr_ao_data->changed_next;
|
|
|
|
|
pr_ao_data->is_changed = FALSE;
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
if (!pr_ao_data->obj_watcher.dbobj->nmobj) {
|
|
|
|
|
if (pr_ao_data->obj_watcher.dbobj->obj_state >= NML_DBUS_OBJ_STATE_ON_DBUS) {
|
|
|
|
|
NML_NMCLIENT_LOG_W(
|
|
|
|
|
self,
|
|
|
|
|
"[%s]: property %s references %s but object is not created",
|
|
|
|
|
pr_ao->owner_dbobj->dbus_path->str,
|
|
|
|
|
pr_ao->meta_iface->dbus_properties[pr_ao->dbus_property_idx].dbus_property_name,
|
|
|
|
|
pr_ao_data->obj_watcher.dbobj->dbus_path->str);
|
|
|
|
|
} else {
|
|
|
|
|
NML_NMCLIENT_LOG_E(
|
|
|
|
|
self,
|
|
|
|
|
"[%s]: property %s references %s but object is not present on D-Bus",
|
|
|
|
|
pr_ao->owner_dbobj->dbus_path->str,
|
|
|
|
|
pr_ao->meta_iface->dbus_properties[pr_ao->dbus_property_idx].dbus_property_name,
|
|
|
|
|
pr_ao_data->obj_watcher.dbobj->dbus_path->str);
|
|
|
|
|
}
|
|
|
|
|
goto done_pr_ao_data;
|
|
|
|
|
}
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
vtable =
|
|
|
|
|
pr_ao->meta_iface->dbus_properties[pr_ao->dbus_property_idx].extra.property_vtable_ao;
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
gtype = vtable->get_o_type_fcn();
|
|
|
|
|
if (!g_type_is_a(G_OBJECT_TYPE(pr_ao_data->obj_watcher.dbobj->nmobj), gtype)) {
|
|
|
|
|
NML_NMCLIENT_LOG_E(
|
|
|
|
|
self,
|
|
|
|
|
"[%s]: property %s references %s with unexpected GObject type %s instead of %s",
|
|
|
|
|
pr_ao->owner_dbobj->dbus_path->str,
|
|
|
|
|
pr_ao->meta_iface->dbus_properties[pr_ao->dbus_property_idx].dbus_property_name,
|
|
|
|
|
pr_ao_data->obj_watcher.dbobj->dbus_path->str,
|
|
|
|
|
G_OBJECT_TYPE_NAME(pr_ao_data->obj_watcher.dbobj->nmobj),
|
|
|
|
|
g_type_name(gtype));
|
|
|
|
|
goto done_pr_ao_data;
|
|
|
|
|
}
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
if (pr_ao_data->obj_watcher.dbobj == pr_ao->owner_dbobj) {
|
|
|
|
|
NML_NMCLIENT_LOG_W(
|
|
|
|
|
self,
|
|
|
|
|
"[%s]: property %s references itself",
|
|
|
|
|
pr_ao->owner_dbobj->dbus_path->str,
|
|
|
|
|
pr_ao->meta_iface->dbus_properties[pr_ao->dbus_property_idx].dbus_property_name);
|
|
|
|
|
nmobj = pr_ao->owner_dbobj->nmobj;
|
|
|
|
|
goto done_pr_ao_data;
|
|
|
|
|
}
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
pr_ao_data->block_is_changed = TRUE;
|
|
|
|
|
is_ready = _dbobjs_check_dbobj_ready(self, pr_ao_data->obj_watcher.dbobj);
|
|
|
|
|
pr_ao_data->block_is_changed = FALSE;
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
if (!is_ready) {
|
|
|
|
|
is_ready = vtable->is_always_ready;
|
|
|
|
|
goto done_pr_ao_data;
|
|
|
|
|
}
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
if (vtable->check_nmobj_visible_fcn
|
|
|
|
|
&& !vtable->check_nmobj_visible_fcn(pr_ao_data->obj_watcher.dbobj->nmobj)) {
|
|
|
|
|
is_ready = TRUE;
|
|
|
|
|
goto done_pr_ao_data;
|
|
|
|
|
}
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
nmobj = pr_ao_data->obj_watcher.dbobj->nmobj;
|
|
|
|
|
|
|
|
|
|
done_pr_ao_data:
|
|
|
|
|
|
|
|
|
|
if (!pr_ao_data->is_ready && is_ready) {
|
|
|
|
|
nm_assert(pr_ao->n_not_ready > 0);
|
|
|
|
|
pr_ao->n_not_ready--;
|
|
|
|
|
pr_ao_data->is_ready = TRUE;
|
|
|
|
|
changed_ready = TRUE;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (pr_ao_data->nmobj != nmobj) {
|
|
|
|
|
if (nml_dbus_property_ao_notify_changed_ao(pr_ao_data, self, FALSE))
|
|
|
|
|
changed_prop = TRUE;
|
|
|
|
|
pr_ao_data->nmobj = nmobj;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!pr_ao_data->is_notified) {
|
|
|
|
|
if (nml_dbus_property_ao_notify_changed_ao(pr_ao_data, self, TRUE))
|
|
|
|
|
changed_prop = TRUE;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
_ASSERT_pr_ao(pr_ao);
|
|
|
|
|
|
|
|
|
|
done:
|
|
|
|
|
if (changed_prop) {
|
|
|
|
|
nm_clear_pointer(&pr_ao->arr, g_ptr_array_unref);
|
|
|
|
|
_nm_client_queue_notify_object(
|
|
|
|
|
self,
|
|
|
|
|
pr_ao->owner_dbobj->nmobj,
|
|
|
|
|
pr_ao->meta_iface->obj_properties
|
|
|
|
|
[pr_ao->meta_iface->dbus_properties[pr_ao->dbus_property_idx].obj_properties_idx]);
|
|
|
|
|
}
|
|
|
|
|
if (changed_ready && pr_ao->n_not_ready == 0
|
|
|
|
|
&& pr_ao->owner_dbobj->obj_state == NML_DBUS_OBJ_STATE_WITH_NMOBJ_NOT_READY)
|
|
|
|
|
nml_dbus_object_obj_changed_link(self, pr_ao->owner_dbobj, NML_DBUS_OBJ_CHANGED_TYPE_NMOBJ);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
nml_dbus_property_ao_notify_changed_many(NMLDBusPropertyAO *ptr, guint len, NMClient *self)
|
|
|
|
|
{
|
|
|
|
|
while (len-- > 0)
|
|
|
|
|
nml_dbus_property_ao_notify_changed(ptr++, self);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
nml_dbus_property_ao_notify_watch_cb(NMClient *self, gpointer obj_watcher)
|
|
|
|
|
{
|
2021-11-09 13:28:54 +01:00
|
|
|
PropertyAOData *pr_ao_data = obj_watcher;
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
NMLDBusPropertyAO *pr_ao = pr_ao_data->parent;
|
|
|
|
|
|
|
|
|
|
nm_assert(g_hash_table_lookup(pr_ao->hash, pr_ao_data) == pr_ao_data);
|
|
|
|
|
|
|
|
|
|
if (!pr_ao_data->block_is_changed && !pr_ao_data->is_changed) {
|
|
|
|
|
pr_ao_data->is_changed = TRUE;
|
|
|
|
|
pr_ao_data->changed_next = pr_ao->changed_head;
|
|
|
|
|
pr_ao->changed_head = pr_ao_data;
|
|
|
|
|
if (!pr_ao->is_changed) {
|
|
|
|
|
pr_ao->is_changed = TRUE;
|
|
|
|
|
nml_dbus_object_obj_changed_link(self,
|
|
|
|
|
pr_ao->owner_dbobj,
|
|
|
|
|
NML_DBUS_OBJ_CHANGED_TYPE_NMOBJ);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
_ASSERT_pr_ao(pr_ao);
|
|
|
|
|
}
|
|
|
|
|
|
2021-10-07 17:29:13 +02:00
|
|
|
NMLDBusNotifyUpdatePropFlags
|
2021-11-09 13:28:54 +01:00
|
|
|
nml_dbus_property_ao_notify(NMClient *self,
|
|
|
|
|
NMLDBusPropertyAO *pr_ao,
|
|
|
|
|
NMLDBusObject *dbobj,
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
const NMLDBusMetaIface *meta_iface,
|
|
|
|
|
guint dbus_property_idx,
|
2021-11-09 13:28:54 +01:00
|
|
|
GVariant *value)
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
{
|
|
|
|
|
CList stale_lst_head = C_LIST_INIT(stale_lst_head);
|
|
|
|
|
PropertyAOData *pr_ao_data;
|
|
|
|
|
gboolean changed_prop = FALSE;
|
|
|
|
|
gboolean changed_obj = FALSE;
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
if (!pr_ao->owner_dbobj) {
|
|
|
|
|
nm_assert(!pr_ao->data_lst_head.next);
|
|
|
|
|
nm_assert(!pr_ao->data_lst_head.prev);
|
|
|
|
|
nm_assert(!pr_ao->hash);
|
|
|
|
|
nm_assert(!pr_ao->meta_iface);
|
|
|
|
|
nm_assert(pr_ao->dbus_property_idx == 0);
|
|
|
|
|
nm_assert(pr_ao->n_not_ready == 0);
|
|
|
|
|
nm_assert(!pr_ao->changed_head);
|
|
|
|
|
nm_assert(!pr_ao->is_changed);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
c_list_init(&pr_ao->data_lst_head);
|
|
|
|
|
pr_ao->hash = g_hash_table_new(nm_ppdirect_hash, nm_ppdirect_equal);
|
|
|
|
|
pr_ao->owner_dbobj = dbobj;
|
|
|
|
|
pr_ao->meta_iface = meta_iface;
|
|
|
|
|
pr_ao->dbus_property_idx = dbus_property_idx;
|
|
|
|
|
} else {
|
|
|
|
|
nm_assert(pr_ao->data_lst_head.next);
|
|
|
|
|
nm_assert(pr_ao->data_lst_head.prev);
|
|
|
|
|
nm_assert(pr_ao->hash);
|
|
|
|
|
nm_assert(pr_ao->meta_iface == meta_iface);
|
|
|
|
|
nm_assert(pr_ao->dbus_property_idx == dbus_property_idx);
|
|
|
|
|
}
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
c_list_splice(&stale_lst_head, &pr_ao->data_lst_head);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
if (value) {
|
|
|
|
|
GVariantIter iter;
|
2021-11-09 13:28:54 +01:00
|
|
|
const char *path;
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
g_variant_iter_init(&iter, value);
|
|
|
|
|
while (g_variant_iter_next(&iter, "&o", &path)) {
|
|
|
|
|
nm_auto_ref_string NMRefString *dbus_path_r = NULL;
|
|
|
|
|
gpointer p_dbus_path_1;
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
G_STATIC_ASSERT_EXPR(G_STRUCT_OFFSET(PropertyAOData, obj_watcher) == 0);
|
|
|
|
|
G_STATIC_ASSERT_EXPR(G_STRUCT_OFFSET(NMLDBusObjWatcher, dbobj) == 0);
|
|
|
|
|
G_STATIC_ASSERT_EXPR(G_STRUCT_OFFSET(NMLDBusObject, dbus_path) == 0);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
if (!nm_dbus_path_not_empty(path)) {
|
|
|
|
|
/* should not happen. Anyway, silently skip empty paths. */
|
|
|
|
|
continue;
|
|
|
|
|
}
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
dbus_path_r = nm_ref_string_new(path);
|
|
|
|
|
p_dbus_path_1 = &dbus_path_r;
|
|
|
|
|
pr_ao_data = g_hash_table_lookup(pr_ao->hash, &p_dbus_path_1);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
if (pr_ao_data) {
|
|
|
|
|
/* With this implementation we cannot track the same path multiple times.
|
|
|
|
|
* Of course, for none of the properties where we use this, the server
|
|
|
|
|
* should expose the same path more than once, so this limitation is fine
|
|
|
|
|
* (maybe even preferable to drop duplicates form NMClient's API). */
|
|
|
|
|
nm_assert(pr_ao_data->obj_watcher.dbobj->dbus_path == dbus_path_r);
|
|
|
|
|
if (!changed_prop && pr_ao_data->is_notified) {
|
|
|
|
|
/* The order of a notified entry changed. That means, we need to signal
|
|
|
|
|
* a change of the property. This detection of a change is not always
|
|
|
|
|
* correct, in particular we might detect some changes when there were
|
|
|
|
|
* none. That's not a serious problem, and fixing it would be expensive
|
|
|
|
|
* to implement. */
|
|
|
|
|
changed_prop = (c_list_first(&stale_lst_head) != &pr_ao_data->data_lst);
|
|
|
|
|
}
|
|
|
|
|
nm_c_list_move_tail(&pr_ao->data_lst_head, &pr_ao_data->data_lst);
|
|
|
|
|
} else {
|
|
|
|
|
pr_ao_data = _dbobjs_obj_watcher_register_r(self,
|
|
|
|
|
g_steal_pointer(&dbus_path_r),
|
|
|
|
|
nml_dbus_property_ao_notify_watch_cb,
|
|
|
|
|
sizeof(PropertyAOData)),
|
|
|
|
|
pr_ao_data->parent = pr_ao;
|
|
|
|
|
pr_ao_data->nmobj = NULL;
|
|
|
|
|
pr_ao_data->changed_next = NULL;
|
|
|
|
|
pr_ao_data->is_changed = TRUE;
|
|
|
|
|
pr_ao_data->block_is_changed = FALSE;
|
|
|
|
|
pr_ao_data->is_ready = FALSE;
|
|
|
|
|
pr_ao_data->is_notified = FALSE;
|
|
|
|
|
c_list_link_tail(&pr_ao->data_lst_head, &pr_ao_data->data_lst);
|
|
|
|
|
if (!g_hash_table_add(pr_ao->hash, pr_ao_data))
|
|
|
|
|
nm_assert_not_reached();
|
|
|
|
|
nm_assert(pr_ao->n_not_ready < G_MAXUINT);
|
|
|
|
|
pr_ao->n_not_ready++;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#if NM_MORE_ASSERTS > 10
|
|
|
|
|
{
|
|
|
|
|
nm_auto_ref_string NMRefString *p = nm_ref_string_new(path);
|
|
|
|
|
gpointer pp = &p;
|
|
|
|
|
|
|
|
|
|
nm_assert(g_hash_table_lookup(pr_ao->hash, &pp) == pr_ao_data);
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
pr_ao->changed_head = NULL;
|
|
|
|
|
c_list_for_each_entry (pr_ao_data, &pr_ao->data_lst_head, data_lst) {
|
|
|
|
|
if (pr_ao_data->is_changed) {
|
|
|
|
|
pr_ao_data->changed_next = pr_ao->changed_head;
|
|
|
|
|
pr_ao->changed_head = pr_ao_data;
|
|
|
|
|
changed_obj = TRUE;
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
while ((pr_ao_data = c_list_first_entry(&stale_lst_head, PropertyAOData, data_lst))) {
|
|
|
|
|
changed_obj = TRUE;
|
|
|
|
|
c_list_unlink(&pr_ao_data->data_lst);
|
|
|
|
|
if (!g_hash_table_remove(pr_ao->hash, pr_ao_data))
|
|
|
|
|
nm_assert_not_reached();
|
|
|
|
|
if (!pr_ao_data->is_ready) {
|
|
|
|
|
nm_assert(pr_ao->n_not_ready > 0);
|
|
|
|
|
pr_ao->n_not_ready--;
|
|
|
|
|
} else {
|
|
|
|
|
if (nml_dbus_property_ao_notify_changed_ao(pr_ao_data, self, FALSE))
|
|
|
|
|
changed_prop = TRUE;
|
|
|
|
|
}
|
|
|
|
|
_dbobjs_obj_watcher_unregister(self, pr_ao_data);
|
|
|
|
|
}
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
_ASSERT_pr_ao(pr_ao);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
if (changed_obj) {
|
|
|
|
|
pr_ao->is_changed = TRUE;
|
|
|
|
|
nml_dbus_object_obj_changed_link(self, dbobj, NML_DBUS_OBJ_CHANGED_TYPE_NMOBJ);
|
|
|
|
|
}
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
if (changed_prop) {
|
|
|
|
|
nm_clear_pointer(&pr_ao->arr, g_ptr_array_unref);
|
|
|
|
|
_nm_client_queue_notify_object(
|
|
|
|
|
self,
|
|
|
|
|
pr_ao->owner_dbobj->nmobj,
|
|
|
|
|
pr_ao->meta_iface->obj_properties
|
|
|
|
|
[pr_ao->meta_iface->dbus_properties[pr_ao->dbus_property_idx].obj_properties_idx]);
|
|
|
|
|
}
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
return NML_DBUS_NOTIFY_UPDATE_PROP_FLAGS_NONE;
|
|
|
|
|
}
|
|
|
|
|
|
2021-10-07 18:27:24 +02:00
|
|
|
gboolean
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
nml_dbus_property_ao_clear(NMLDBusPropertyAO *pr_ao, NMClient *self)
|
|
|
|
|
{
|
2021-10-07 18:27:24 +02:00
|
|
|
gboolean changed_prop = FALSE;
|
|
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
_ASSERT_pr_ao(pr_ao);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
if (!pr_ao->owner_dbobj) {
|
|
|
|
|
nm_assert(pr_ao->n_not_ready == 0);
|
|
|
|
|
nm_assert((!pr_ao->data_lst_head.next && !pr_ao->data_lst_head.prev)
|
|
|
|
|
|| (pr_ao->data_lst_head.next == pr_ao->data_lst_head.prev));
|
|
|
|
|
nm_assert(!pr_ao->hash);
|
|
|
|
|
nm_assert(!pr_ao->meta_iface);
|
|
|
|
|
nm_assert(pr_ao->dbus_property_idx == 0);
|
|
|
|
|
nm_assert(!pr_ao->is_changed);
|
|
|
|
|
} else {
|
|
|
|
|
PropertyAOData *pr_ao_data;
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
nm_assert(NM_IS_CLIENT(self));
|
|
|
|
|
nm_assert(pr_ao->data_lst_head.next);
|
|
|
|
|
nm_assert(pr_ao->data_lst_head.prev);
|
|
|
|
|
nm_assert(pr_ao->hash);
|
|
|
|
|
nm_assert(pr_ao->meta_iface);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
while ((pr_ao_data = c_list_first_entry(&pr_ao->data_lst_head, PropertyAOData, data_lst))) {
|
|
|
|
|
if (!pr_ao_data->is_ready) {
|
|
|
|
|
nm_assert(pr_ao->n_not_ready > 0);
|
|
|
|
|
pr_ao->n_not_ready--;
|
|
|
|
|
} else {
|
|
|
|
|
if (nml_dbus_property_ao_notify_changed_ao(pr_ao_data, self, FALSE))
|
|
|
|
|
changed_prop = TRUE;
|
|
|
|
|
}
|
|
|
|
|
c_list_unlink(&pr_ao_data->data_lst);
|
|
|
|
|
if (!g_hash_table_remove(pr_ao->hash, pr_ao_data))
|
|
|
|
|
nm_assert_not_reached();
|
|
|
|
|
_dbobjs_obj_watcher_unregister(self, pr_ao_data);
|
|
|
|
|
}
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
nm_assert(c_list_is_empty(&pr_ao->data_lst_head));
|
|
|
|
|
nm_assert(pr_ao->n_not_ready == 0);
|
|
|
|
|
nm_assert(g_hash_table_size(pr_ao->hash) == 0);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
if (changed_prop && pr_ao->owner_dbobj->nmobj) {
|
|
|
|
|
_nm_client_queue_notify_object(
|
|
|
|
|
self,
|
|
|
|
|
pr_ao->owner_dbobj->nmobj,
|
|
|
|
|
pr_ao->meta_iface
|
|
|
|
|
->obj_properties[pr_ao->meta_iface->dbus_properties[pr_ao->dbus_property_idx]
|
|
|
|
|
.obj_properties_idx]);
|
|
|
|
|
}
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
nm_assert(c_list_is_empty(&pr_ao->data_lst_head));
|
|
|
|
|
nm_assert(pr_ao->n_not_ready == 0);
|
|
|
|
|
nm_assert(g_hash_table_size(pr_ao->hash) == 0);
|
|
|
|
|
nm_clear_pointer(&pr_ao->hash, g_hash_table_unref);
|
|
|
|
|
pr_ao->owner_dbobj = NULL;
|
|
|
|
|
pr_ao->meta_iface = NULL;
|
|
|
|
|
pr_ao->dbus_property_idx = 0;
|
|
|
|
|
pr_ao->data_lst_head.next = NULL;
|
|
|
|
|
pr_ao->data_lst_head.prev = NULL;
|
|
|
|
|
pr_ao->is_changed = FALSE;
|
|
|
|
|
}
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
nm_clear_pointer(&pr_ao->arr, g_ptr_array_unref);
|
2021-10-07 18:27:24 +02:00
|
|
|
|
|
|
|
|
return changed_prop;
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
nml_dbus_property_ao_clear_many(NMLDBusPropertyAO *pr_ao, guint len, NMClient *self)
|
|
|
|
|
{
|
|
|
|
|
while (len-- > 0)
|
|
|
|
|
nml_dbus_property_ao_clear(pr_ao++, self);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*****************************************************************************/
|
|
|
|
|
|
|
|
|
|
NMLDBusNotifyUpdatePropFlags
|
2021-11-09 13:28:54 +01:00
|
|
|
_nml_dbus_notify_update_prop_ignore(NMClient *self,
|
|
|
|
|
NMLDBusObject *dbobj,
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
const NMLDBusMetaIface *meta_iface,
|
|
|
|
|
guint dbus_property_idx,
|
2021-11-09 13:28:54 +01:00
|
|
|
GVariant *value)
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
{
|
|
|
|
|
return NML_DBUS_NOTIFY_UPDATE_PROP_FLAGS_NONE;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
NMLDBusNotifyUpdatePropFlags
|
2021-11-09 13:28:54 +01:00
|
|
|
_nml_dbus_notify_update_prop_o(NMClient *self,
|
|
|
|
|
NMLDBusObject *dbobj,
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
const NMLDBusMetaIface *meta_iface,
|
|
|
|
|
guint dbus_property_idx,
|
2021-11-09 13:28:54 +01:00
|
|
|
GVariant *value)
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
{
|
2021-11-09 13:28:54 +01:00
|
|
|
const char *path = NULL;
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
NMRefString **p_property;
|
|
|
|
|
|
|
|
|
|
if (value)
|
|
|
|
|
path = g_variant_get_string(value, NULL);
|
|
|
|
|
|
|
|
|
|
p_property =
|
|
|
|
|
nml_dbus_object_get_property_location(dbobj,
|
|
|
|
|
meta_iface,
|
|
|
|
|
&meta_iface->dbus_properties[dbus_property_idx]);
|
|
|
|
|
|
|
|
|
|
if (!nm_streq0(nm_ref_string_get_str(*p_property), path)) {
|
|
|
|
|
nm_ref_string_unref(*p_property);
|
|
|
|
|
*p_property = nm_ref_string_new(path);
|
|
|
|
|
}
|
|
|
|
|
return NML_DBUS_NOTIFY_UPDATE_PROP_FLAGS_NOTIFY;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*****************************************************************************/
|
|
|
|
|
|
|
|
|
|
static void
|
2021-11-09 13:28:54 +01:00
|
|
|
_obj_handle_dbus_prop_changes(NMClient *self,
|
|
|
|
|
NMLDBusObject *dbobj,
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
NMLDBusObjIfaceData *db_iface_data,
|
|
|
|
|
guint dbus_property_idx,
|
2021-11-09 13:28:54 +01:00
|
|
|
GVariant *value)
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
{
|
2021-11-09 13:28:54 +01:00
|
|
|
const NMLDBusMetaIface *meta_iface = db_iface_data->dbus_iface.meta;
|
|
|
|
|
const NMLDBusMetaProperty *meta_property = &meta_iface->dbus_properties[dbus_property_idx];
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
gpointer p_property;
|
2021-11-09 13:28:54 +01:00
|
|
|
const char *dbus_type_s;
|
|
|
|
|
const GParamSpec *param_spec;
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
NMLDBusNotifyUpdatePropFlags notify_update_prop_flags;
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
nm_assert(G_IS_OBJECT(dbobj->nmobj));
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
if (value && !g_variant_is_of_type(value, meta_property->dbus_type)) {
|
|
|
|
|
NML_NMCLIENT_LOG_E(self,
|
|
|
|
|
"[%s] property %s.%s expected of type \"%s\" but is \"%s\". Ignore",
|
|
|
|
|
dbobj->dbus_path->str,
|
|
|
|
|
meta_iface->dbus_iface_name,
|
|
|
|
|
meta_property->dbus_property_name,
|
|
|
|
|
(const char *) meta_property->dbus_type,
|
|
|
|
|
(const char *) g_variant_get_type(value));
|
|
|
|
|
value = NULL;
|
|
|
|
|
}
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2021-10-07 16:06:14 +02:00
|
|
|
if (meta_property->notify_update_prop) {
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
notify_update_prop_flags =
|
|
|
|
|
meta_property->notify_update_prop(self, dbobj, meta_iface, dbus_property_idx, value);
|
|
|
|
|
if (notify_update_prop_flags == NML_DBUS_NOTIFY_UPDATE_PROP_FLAGS_NONE)
|
|
|
|
|
return;
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
nm_assert(notify_update_prop_flags == NML_DBUS_NOTIFY_UPDATE_PROP_FLAGS_NOTIFY);
|
|
|
|
|
nm_assert(G_IS_OBJECT(dbobj->nmobj));
|
|
|
|
|
nm_assert(meta_iface->obj_properties);
|
|
|
|
|
nm_assert(meta_property->obj_properties_idx > 0);
|
|
|
|
|
param_spec = meta_iface->obj_properties[meta_property->obj_properties_idx];
|
|
|
|
|
goto notify;
|
|
|
|
|
}
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
p_property = nml_dbus_object_get_property_location(dbobj, meta_iface, meta_property);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
dbus_type_s = (const char *) meta_property->dbus_type;
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
nm_assert(G_IS_OBJECT(dbobj->nmobj));
|
|
|
|
|
nm_assert(meta_iface->obj_properties);
|
|
|
|
|
nm_assert(meta_property->obj_properties_idx > 0);
|
|
|
|
|
param_spec = meta_iface->obj_properties[meta_property->obj_properties_idx];
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
notify_update_prop_flags = NML_DBUS_NOTIFY_UPDATE_PROP_FLAGS_NOTIFY;
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
switch (dbus_type_s[0]) {
|
|
|
|
|
case 'b':
|
|
|
|
|
nm_assert(dbus_type_s[1] == '\0');
|
|
|
|
|
if (value)
|
|
|
|
|
*((bool *) p_property) = g_variant_get_boolean(value);
|
|
|
|
|
else if (param_spec->value_type == G_TYPE_BOOLEAN)
|
|
|
|
|
*((bool *) p_property) = ((GParamSpecBoolean *) param_spec)->default_value;
|
|
|
|
|
else {
|
|
|
|
|
nm_assert_not_reached();
|
|
|
|
|
*((bool *) p_property) = FALSE;
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
case 'y':
|
|
|
|
|
nm_assert(dbus_type_s[1] == '\0');
|
|
|
|
|
if (value)
|
|
|
|
|
*((guint8 *) p_property) = g_variant_get_byte(value);
|
|
|
|
|
else {
|
|
|
|
|
nm_assert(nm_utils_g_param_spec_is_default(param_spec));
|
|
|
|
|
*((guint8 *) p_property) = 0;
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
case 'q':
|
|
|
|
|
nm_assert(dbus_type_s[1] == '\0');
|
|
|
|
|
if (value)
|
|
|
|
|
*((guint16 *) p_property) = g_variant_get_uint16(value);
|
|
|
|
|
else {
|
|
|
|
|
nm_assert(nm_utils_g_param_spec_is_default(param_spec));
|
|
|
|
|
*((guint16 *) p_property) = 0;
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
case 'i':
|
|
|
|
|
nm_assert(dbus_type_s[1] == '\0');
|
|
|
|
|
if (value)
|
|
|
|
|
*((gint32 *) p_property) = g_variant_get_int32(value);
|
|
|
|
|
else if (param_spec->value_type == G_TYPE_INT)
|
|
|
|
|
*((gint32 *) p_property) = ((GParamSpecInt *) param_spec)->default_value;
|
|
|
|
|
else {
|
|
|
|
|
nm_assert(nm_utils_g_param_spec_is_default(param_spec));
|
|
|
|
|
*((gint32 *) p_property) = 0;
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
case 'u':
|
|
|
|
|
nm_assert(dbus_type_s[1] == '\0');
|
|
|
|
|
if (value)
|
|
|
|
|
*((guint32 *) p_property) = g_variant_get_uint32(value);
|
|
|
|
|
else {
|
|
|
|
|
nm_assert(nm_utils_g_param_spec_is_default(param_spec));
|
|
|
|
|
*((guint32 *) p_property) = 0;
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
case 'x':
|
|
|
|
|
nm_assert(dbus_type_s[1] == '\0');
|
|
|
|
|
if (value)
|
|
|
|
|
*((gint64 *) p_property) = g_variant_get_int64(value);
|
|
|
|
|
else if (param_spec->value_type == G_TYPE_INT64)
|
|
|
|
|
*((gint64 *) p_property) = ((GParamSpecInt64 *) param_spec)->default_value;
|
|
|
|
|
else {
|
|
|
|
|
nm_assert(nm_utils_g_param_spec_is_default(param_spec));
|
|
|
|
|
*((gint64 *) p_property) = 0;
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
case 't':
|
|
|
|
|
nm_assert(dbus_type_s[1] == '\0');
|
|
|
|
|
if (value)
|
|
|
|
|
*((guint64 *) p_property) = g_variant_get_uint64(value);
|
|
|
|
|
else {
|
|
|
|
|
nm_assert(nm_utils_g_param_spec_is_default(param_spec));
|
|
|
|
|
*((guint64 *) p_property) = 0;
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
case 's':
|
|
|
|
|
nm_assert(dbus_type_s[1] == '\0');
|
|
|
|
|
nm_clear_g_free((char **) p_property);
|
|
|
|
|
if (value)
|
|
|
|
|
*((char **) p_property) = g_variant_dup_string(value, NULL);
|
|
|
|
|
else {
|
|
|
|
|
nm_assert(nm_utils_g_param_spec_is_default(param_spec));
|
|
|
|
|
*((char **) p_property) = NULL;
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
case 'o':
|
|
|
|
|
nm_assert(dbus_type_s[1] == '\0');
|
|
|
|
|
notify_update_prop_flags = nml_dbus_property_o_notify(self,
|
|
|
|
|
p_property,
|
|
|
|
|
dbobj,
|
|
|
|
|
meta_iface,
|
|
|
|
|
dbus_property_idx,
|
|
|
|
|
value);
|
|
|
|
|
break;
|
|
|
|
|
case 'a':
|
|
|
|
|
switch (dbus_type_s[1]) {
|
|
|
|
|
case 'y':
|
|
|
|
|
nm_assert(dbus_type_s[2] == '\0');
|
|
|
|
|
{
|
|
|
|
|
gconstpointer v;
|
|
|
|
|
gsize l;
|
2021-11-09 13:28:54 +01:00
|
|
|
GBytes *b = NULL;
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
if (value) {
|
|
|
|
|
v = g_variant_get_fixed_array(value, &l, 1);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
if (l > 0) {
|
|
|
|
|
/* empty arrays are coerced to NULL. */
|
|
|
|
|
b = g_bytes_new(v, l);
|
2020-09-28 16:03:33 +02:00
|
|
|
}
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
}
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
nm_clear_pointer((GBytes **) p_property, g_bytes_unref);
|
|
|
|
|
*((GBytes **) p_property) = b;
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
case 's':
|
|
|
|
|
nm_assert(dbus_type_s[2] == '\0');
|
|
|
|
|
nm_assert(param_spec->value_type == G_TYPE_STRV);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
g_strfreev(*((char ***) p_property));
|
|
|
|
|
if (value)
|
|
|
|
|
*((char ***) p_property) = g_variant_dup_strv(value, NULL);
|
|
|
|
|
else
|
|
|
|
|
*((char ***) p_property) = NULL;
|
|
|
|
|
break;
|
|
|
|
|
case 'o':
|
|
|
|
|
nm_assert(dbus_type_s[2] == '\0');
|
|
|
|
|
notify_update_prop_flags = nml_dbus_property_ao_notify(self,
|
|
|
|
|
p_property,
|
|
|
|
|
dbobj,
|
|
|
|
|
meta_iface,
|
|
|
|
|
dbus_property_idx,
|
|
|
|
|
value);
|
|
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
nm_assert_not_reached();
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
nm_assert_not_reached();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
notify:
|
|
|
|
|
if (NM_FLAGS_HAS(notify_update_prop_flags, NML_DBUS_NOTIFY_UPDATE_PROP_FLAGS_NOTIFY))
|
|
|
|
|
g_object_notify_by_pspec(dbobj->nmobj, (GParamSpec *) param_spec);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
2021-11-09 13:28:54 +01:00
|
|
|
_obj_handle_dbus_iface_changes(NMClient *self,
|
|
|
|
|
NMLDBusObject *dbobj,
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
NMLDBusObjIfaceData *db_iface_data)
|
|
|
|
|
{
|
|
|
|
|
NMLDBusObjPropData *db_prop_data;
|
|
|
|
|
gboolean is_self = (G_OBJECT(self) == dbobj->nmobj);
|
|
|
|
|
gboolean is_removed;
|
|
|
|
|
gboolean type_compatible;
|
|
|
|
|
guint8 i_prop;
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
nm_assert(NM_IS_CLIENT(self));
|
|
|
|
|
nm_assert(is_self || NM_IS_OBJECT(dbobj->nmobj));
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
if (G_UNLIKELY(!db_iface_data->nmobj_checked)) {
|
|
|
|
|
db_iface_data->nmobj_checked = TRUE;
|
|
|
|
|
type_compatible = db_iface_data->dbus_iface.meta->get_type_fcn
|
|
|
|
|
&& g_type_is_a(G_OBJECT_TYPE(dbobj->nmobj),
|
|
|
|
|
db_iface_data->dbus_iface.meta->get_type_fcn());
|
|
|
|
|
db_iface_data->nmobj_compatible = type_compatible;
|
|
|
|
|
} else
|
|
|
|
|
type_compatible = db_iface_data->nmobj_compatible;
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
if (!type_compatible) {
|
|
|
|
|
/* on D-Bus, we have this interface associate with the object, but apparently
|
|
|
|
|
* it is not compatible. This is either a bug, or NetworkManager exposed an
|
|
|
|
|
* unexpected interface on D-Bus object for which we create a certain NMObject
|
|
|
|
|
* type. */
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
is_removed = c_list_is_empty(&db_iface_data->iface_lst);
|
|
|
|
|
|
|
|
|
|
nm_assert(is_removed || !c_list_is_empty(&db_iface_data->changed_prop_lst_head));
|
|
|
|
|
|
|
|
|
|
_nm_client_queue_notify_object(self, dbobj->nmobj, NULL);
|
|
|
|
|
|
|
|
|
|
if (is_removed) {
|
|
|
|
|
for (i_prop = 0; i_prop < db_iface_data->dbus_iface.meta->n_dbus_properties; i_prop++) {
|
2022-06-24 13:59:31 +02:00
|
|
|
const GVariantType *dbus_type =
|
|
|
|
|
db_iface_data->dbus_iface.meta->dbus_properties[i_prop].dbus_type;
|
|
|
|
|
|
|
|
|
|
/* Unset properties that can potentially contain objects, to release them,
|
|
|
|
|
* but keep the rest around, because it might still make sense to know what
|
|
|
|
|
* they were (e.g. when a device has been removed we'd like know what interface
|
|
|
|
|
* name it had, or keep the state to avoid spurious state change into UNKNOWN). */
|
|
|
|
|
if (g_variant_type_is_array(dbus_type)
|
|
|
|
|
|| g_variant_type_equal(dbus_type, G_VARIANT_TYPE_OBJECT_PATH))
|
|
|
|
|
_obj_handle_dbus_prop_changes(self, dbobj, db_iface_data, i_prop, NULL);
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
while ((db_prop_data = c_list_first_entry(&db_iface_data->changed_prop_lst_head,
|
|
|
|
|
NMLDBusObjPropData,
|
|
|
|
|
changed_prop_lst))) {
|
|
|
|
|
gs_unref_variant GVariant *prop_data_value = NULL;
|
|
|
|
|
|
|
|
|
|
c_list_unlink(&db_prop_data->changed_prop_lst);
|
|
|
|
|
|
|
|
|
|
nm_assert(db_prop_data >= db_iface_data->prop_datas);
|
|
|
|
|
nm_assert(
|
|
|
|
|
db_prop_data
|
|
|
|
|
< &db_iface_data->prop_datas[db_iface_data->dbus_iface.meta->n_dbus_properties]);
|
|
|
|
|
nm_assert(db_prop_data->prop_data_value);
|
|
|
|
|
|
2020-07-01 17:20:40 -04:00
|
|
|
/* Currently, NMLDBusObject forgets about the variant. Theoretically, it could cache
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
* it, but there is no need because we update the property in nmobj (which extracts and
|
|
|
|
|
* keeps the property value itself).
|
|
|
|
|
*
|
|
|
|
|
* Note that we only consume the variant here when we process it.
|
|
|
|
|
* That implies that we already created a NMObject for the dbobj
|
|
|
|
|
* instance. Unless that happens, we cache the last seen property values. */
|
|
|
|
|
prop_data_value = g_steal_pointer(&db_prop_data->prop_data_value);
|
|
|
|
|
|
|
|
|
|
i_prop = (db_prop_data - &db_iface_data->prop_datas[0]);
|
|
|
|
|
_obj_handle_dbus_prop_changes(self, dbobj, db_iface_data, i_prop, prop_data_value);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
_obj_handle_dbus_changes(NMClient *self, NMLDBusObject *dbobj)
|
|
|
|
|
{
|
2021-11-09 13:28:54 +01:00
|
|
|
NMClientPrivate *priv = NM_CLIENT_GET_PRIVATE(self);
|
|
|
|
|
NMLDBusObjIfaceData *db_iface_data;
|
|
|
|
|
NMLDBusObjIfaceData *db_iface_data_safe;
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
gs_unref_object GObject *nmobj_unregistering = NULL;
|
|
|
|
|
|
|
|
|
|
_ASSERT_dbobj(dbobj, self);
|
|
|
|
|
|
2020-07-04 11:37:01 +03:00
|
|
|
/* In a first step we only remember all the changes that a D-Bus message brings
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
* and queue the object to process them.
|
|
|
|
|
*
|
|
|
|
|
* Here (in step 2) we look at what changed on D-Bus and propagate those changes
|
|
|
|
|
* to the NMObject instance.
|
|
|
|
|
*
|
|
|
|
|
* Note that here we still must not emit any GObject signals. That follows later,
|
|
|
|
|
* and again if the object changes, we will just queue that we handle the changes
|
|
|
|
|
* later. */
|
|
|
|
|
|
|
|
|
|
c_list_for_each_entry_safe (db_iface_data,
|
|
|
|
|
db_iface_data_safe,
|
|
|
|
|
&dbobj->iface_lst_head,
|
|
|
|
|
iface_lst) {
|
|
|
|
|
if (!db_iface_data->iface_removed)
|
|
|
|
|
continue;
|
|
|
|
|
c_list_unlink(&db_iface_data->iface_lst);
|
|
|
|
|
if (db_iface_data->dbus_iface_is_wellknown && dbobj->nmobj)
|
|
|
|
|
_obj_handle_dbus_iface_changes(self, dbobj, db_iface_data);
|
|
|
|
|
nml_dbus_obj_iface_data_destroy(db_iface_data);
|
|
|
|
|
}
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
if (G_UNLIKELY(!dbobj->nmobj) && !c_list_is_empty(&dbobj->iface_lst_head)) {
|
|
|
|
|
/* Try to create a NMObject for this D-Bus object. Note that we detect the type
|
|
|
|
|
* based on the interfaces that it has, and if we make a choice once, we don't
|
|
|
|
|
* change. That means, one D-Bus object can only be of one type. */
|
|
|
|
|
|
|
|
|
|
if (NM_IN_SET(dbobj->dbus_path,
|
|
|
|
|
_dbus_path_nm,
|
|
|
|
|
_dbus_path_settings,
|
|
|
|
|
_dbus_path_dns_manager)) {
|
|
|
|
|
/* For the main types, we don't detect them based on the interfaces present,
|
|
|
|
|
* but on the path names. Of course, both should correspond anyway. */
|
|
|
|
|
NML_NMCLIENT_LOG_T(self,
|
|
|
|
|
"[%s]: register NMClient for D-Bus object",
|
|
|
|
|
dbobj->dbus_path->str);
|
|
|
|
|
dbobj->nmobj = G_OBJECT(self);
|
|
|
|
|
if (dbobj->dbus_path == _dbus_path_nm) {
|
|
|
|
|
nm_assert(!priv->dbobj_nm);
|
|
|
|
|
priv->dbobj_nm = dbobj;
|
2023-01-16 08:10:37 +01:00
|
|
|
_set_nm_running(self, TRUE);
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
} else if (dbobj->dbus_path == _dbus_path_settings) {
|
|
|
|
|
nm_assert(!priv->dbobj_settings);
|
|
|
|
|
priv->dbobj_settings = dbobj;
|
|
|
|
|
} else {
|
|
|
|
|
nm_assert(dbobj->dbus_path == _dbus_path_dns_manager);
|
|
|
|
|
nm_assert(!priv->dbobj_dns_manager);
|
|
|
|
|
priv->dbobj_dns_manager = dbobj;
|
|
|
|
|
}
|
|
|
|
|
nml_dbus_object_set_obj_state(dbobj, NML_DBUS_OBJ_STATE_WITH_NMOBJ_READY, self);
|
|
|
|
|
} else {
|
|
|
|
|
GType gtype = G_TYPE_NONE;
|
2020-11-18 10:16:40 +01:00
|
|
|
NMLDBusMetaInteracePrio curr_prio = NML_DBUS_META_INTERFACE_PRIO_INSTANTIATE_10 - 1;
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
c_list_for_each_entry (db_iface_data, &dbobj->iface_lst_head, iface_lst) {
|
|
|
|
|
nm_assert(!db_iface_data->iface_removed);
|
|
|
|
|
if (!db_iface_data->dbus_iface_is_wellknown)
|
|
|
|
|
break;
|
|
|
|
|
if (db_iface_data->dbus_iface.meta->interface_prio <= curr_prio)
|
|
|
|
|
continue;
|
|
|
|
|
curr_prio = db_iface_data->dbus_iface.meta->interface_prio;
|
|
|
|
|
gtype = db_iface_data->dbus_iface.meta->get_type_fcn();
|
|
|
|
|
}
|
|
|
|
|
if (gtype != G_TYPE_NONE) {
|
|
|
|
|
dbobj->nmobj = g_object_new(gtype, NULL);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
NML_NMCLIENT_LOG_T(self,
|
|
|
|
|
"[%s]: register new NMObject " NM_HASH_OBFUSCATE_PTR_FMT
|
|
|
|
|
" of type %s",
|
|
|
|
|
dbobj->dbus_path->str,
|
|
|
|
|
NM_HASH_OBFUSCATE_PTR(dbobj->nmobj),
|
|
|
|
|
g_type_name(gtype));
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
nm_assert(NM_IS_OBJECT(dbobj->nmobj));
|
|
|
|
|
NM_OBJECT_GET_CLASS(dbobj->nmobj)
|
|
|
|
|
->register_client(NM_OBJECT(dbobj->nmobj), self, dbobj);
|
|
|
|
|
nml_dbus_object_set_obj_state(dbobj, NML_DBUS_OBJ_STATE_WITH_NMOBJ_NOT_READY, self);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
c_list_for_each_entry (db_iface_data, &dbobj->iface_lst_head, iface_lst) {
|
|
|
|
|
nm_assert(!db_iface_data->iface_removed);
|
|
|
|
|
if (!db_iface_data->dbus_iface_is_wellknown)
|
|
|
|
|
break;
|
|
|
|
|
if (c_list_is_empty(&db_iface_data->changed_prop_lst_head))
|
|
|
|
|
continue;
|
|
|
|
|
if (dbobj->nmobj)
|
|
|
|
|
_obj_handle_dbus_iface_changes(self, dbobj, db_iface_data);
|
|
|
|
|
}
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
if (c_list_is_empty(&dbobj->iface_lst_head) && dbobj->nmobj) {
|
|
|
|
|
if (dbobj->nmobj == G_OBJECT(self)) {
|
|
|
|
|
dbobj->nmobj = NULL;
|
|
|
|
|
NML_NMCLIENT_LOG_T(self,
|
|
|
|
|
"[%s]: unregister NMClient from D-Bus object",
|
|
|
|
|
dbobj->dbus_path->str);
|
|
|
|
|
if (dbobj->dbus_path == _dbus_path_nm) {
|
|
|
|
|
nm_assert(priv->dbobj_nm == dbobj);
|
|
|
|
|
priv->dbobj_nm = NULL;
|
2023-01-16 08:10:37 +01:00
|
|
|
_set_nm_running(self, TRUE);
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
nml_dbus_property_o_clear_many(priv->nm.property_o,
|
|
|
|
|
G_N_ELEMENTS(priv->nm.property_o),
|
|
|
|
|
self);
|
|
|
|
|
nml_dbus_property_ao_clear_many(priv->nm.property_ao,
|
|
|
|
|
G_N_ELEMENTS(priv->nm.property_ao),
|
|
|
|
|
self);
|
|
|
|
|
} else if (dbobj->dbus_path == _dbus_path_settings) {
|
|
|
|
|
nm_assert(priv->dbobj_settings == dbobj);
|
|
|
|
|
priv->dbobj_settings = NULL;
|
|
|
|
|
nml_dbus_property_ao_clear(&priv->settings.connections, self);
|
|
|
|
|
} else {
|
|
|
|
|
nm_assert(dbobj->dbus_path == _dbus_path_dns_manager);
|
|
|
|
|
nm_assert(priv->dbobj_dns_manager == dbobj);
|
|
|
|
|
priv->dbobj_dns_manager = NULL;
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
nmobj_unregistering = g_steal_pointer(&dbobj->nmobj);
|
|
|
|
|
nml_dbus_object_set_obj_state(dbobj, NML_DBUS_OBJ_STATE_WATCHED_ONLY, self);
|
|
|
|
|
NML_NMCLIENT_LOG_T(self,
|
|
|
|
|
"[%s]: unregister NMObject " NM_HASH_OBFUSCATE_PTR_FMT " of type %s",
|
|
|
|
|
dbobj->dbus_path->str,
|
|
|
|
|
NM_HASH_OBFUSCATE_PTR(nmobj_unregistering),
|
|
|
|
|
g_type_name(G_OBJECT_TYPE(nmobj_unregistering)));
|
|
|
|
|
NM_OBJECT_GET_CLASS(nmobj_unregistering)
|
|
|
|
|
->unregister_client(NM_OBJECT(nmobj_unregistering), self, dbobj);
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
nml_dbus_object_obj_changed_link(self, dbobj, NML_DBUS_OBJ_CHANGED_TYPE_NMOBJ);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*****************************************************************************/
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
_dbus_handle_obj_changed_nmobj(NMClient *self)
|
|
|
|
|
{
|
|
|
|
|
NMClientPrivate *priv = NM_CLIENT_GET_PRIVATE(self);
|
2021-11-09 13:28:54 +01:00
|
|
|
NMLDBusObject *dbobj;
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
CList obj_changed_tmp_lst_head = C_LIST_INIT(obj_changed_tmp_lst_head);
|
|
|
|
|
|
|
|
|
|
nm_assert(!nml_dbus_object_obj_changed_any_linked(self, ~NML_DBUS_OBJ_CHANGED_TYPE_NMOBJ));
|
|
|
|
|
|
|
|
|
|
/* First we notify all watchers that these objects changed. Note that we only do that
|
|
|
|
|
* here for the list before processing the changes below in a loop. That is, because
|
|
|
|
|
* processing changes can again enqueue changed objects, and we only want to want to
|
|
|
|
|
* notify watchers for the events that happened earlier (not repeatedly notify them). */
|
|
|
|
|
c_list_splice(&obj_changed_tmp_lst_head, &priv->obj_changed_lst_head);
|
|
|
|
|
while (
|
|
|
|
|
(dbobj = c_list_first_entry(&obj_changed_tmp_lst_head, NMLDBusObject, obj_changed_lst))) {
|
|
|
|
|
nm_c_list_move_tail(&priv->obj_changed_lst_head, &dbobj->obj_changed_lst);
|
|
|
|
|
_dbobjs_notify_watchers_for_dbobj(self, dbobj);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
again:
|
|
|
|
|
|
|
|
|
|
nm_assert(!nml_dbus_object_obj_changed_any_linked(self, ~NML_DBUS_OBJ_CHANGED_TYPE_NMOBJ));
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
c_list_splice(&obj_changed_tmp_lst_head, &priv->obj_changed_lst_head);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
while (
|
|
|
|
|
(dbobj = c_list_first_entry(&obj_changed_tmp_lst_head, NMLDBusObject, obj_changed_lst))) {
|
|
|
|
|
if (!nml_dbus_object_obj_changed_consume(self, dbobj, NML_DBUS_OBJ_CHANGED_TYPE_NMOBJ)) {
|
|
|
|
|
nm_assert_not_reached();
|
|
|
|
|
continue;
|
|
|
|
|
}
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
if (!dbobj->nmobj)
|
|
|
|
|
continue;
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
if (dbobj->nmobj == G_OBJECT(self)) {
|
|
|
|
|
if (dbobj == priv->dbobj_nm) {
|
|
|
|
|
nml_dbus_property_o_notify_changed_many(priv->nm.property_o,
|
|
|
|
|
G_N_ELEMENTS(priv->nm.property_o),
|
|
|
|
|
self);
|
|
|
|
|
nml_dbus_property_ao_notify_changed_many(priv->nm.property_ao,
|
|
|
|
|
G_N_ELEMENTS(priv->nm.property_ao),
|
|
|
|
|
self);
|
|
|
|
|
} else if (dbobj == priv->dbobj_settings)
|
|
|
|
|
nml_dbus_property_ao_notify_changed(&priv->settings.connections, self);
|
|
|
|
|
else
|
|
|
|
|
nm_assert(dbobj == priv->dbobj_dns_manager);
|
|
|
|
|
} else
|
|
|
|
|
NM_OBJECT_GET_CLASS(dbobj->nmobj)->obj_changed_notify(NM_OBJECT(dbobj->nmobj));
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
_dbobjs_check_dbobj_ready(self, dbobj);
|
|
|
|
|
}
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
if (!c_list_is_empty(&priv->obj_changed_lst_head)) {
|
|
|
|
|
nm_assert(nml_dbus_object_obj_changed_any_linked(self, NML_DBUS_OBJ_CHANGED_TYPE_NMOBJ));
|
|
|
|
|
/* we got new changes enqueued. Need to check again. */
|
|
|
|
|
goto again;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
_dbus_handle_obj_changed_dbus(NMClient *self, const char *log_context)
|
|
|
|
|
{
|
|
|
|
|
NMClientPrivate *priv;
|
2021-11-09 13:28:54 +01:00
|
|
|
NMLDBusObject *dbobj;
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
CList obj_changed_tmp_lst_head = C_LIST_INIT(obj_changed_tmp_lst_head);
|
|
|
|
|
|
|
|
|
|
priv = NM_CLIENT_GET_PRIVATE(self);
|
|
|
|
|
|
|
|
|
|
/* We move the changed list onto a temporary list and consume that.
|
|
|
|
|
* Note that nml_dbus_object_obj_changed_consume() will move the object
|
|
|
|
|
* back to the original list if there are changes of another type.
|
|
|
|
|
*
|
|
|
|
|
* This is done so that we can enqueue more changes while processing the
|
|
|
|
|
* change list. */
|
|
|
|
|
c_list_splice(&obj_changed_tmp_lst_head, &priv->obj_changed_lst_head);
|
|
|
|
|
|
|
|
|
|
while (
|
|
|
|
|
(dbobj = c_list_first_entry(&obj_changed_tmp_lst_head, NMLDBusObject, obj_changed_lst))) {
|
|
|
|
|
nm_auto_unref_nml_dbusobj NMLDBusObject *dbobj_unref = NULL;
|
|
|
|
|
|
|
|
|
|
if (!nml_dbus_object_obj_changed_consume(self, dbobj, NML_DBUS_OBJ_CHANGED_TYPE_DBUS))
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
nm_assert(dbobj->obj_state >= NML_DBUS_OBJ_STATE_ON_DBUS);
|
|
|
|
|
|
|
|
|
|
dbobj_unref = nml_dbus_object_ref(dbobj);
|
|
|
|
|
|
|
|
|
|
_obj_handle_dbus_changes(self, dbobj);
|
|
|
|
|
|
|
|
|
|
if (dbobj->obj_state == NML_DBUS_OBJ_STATE_UNLINKED)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
if (c_list_is_empty(&dbobj->iface_lst_head) && c_list_is_empty(&dbobj->watcher_lst_head)) {
|
|
|
|
|
NML_NMCLIENT_LOG_T(self, "[%s]: drop D-Bus instance", dbobj->dbus_path->str);
|
|
|
|
|
nml_dbus_object_set_obj_state(dbobj, NML_DBUS_OBJ_STATE_UNLINKED, self);
|
|
|
|
|
if (!g_hash_table_steal(priv->dbus_objects, dbobj))
|
|
|
|
|
nm_assert_not_reached();
|
|
|
|
|
nml_dbus_object_unref(dbobj);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* D-Bus changes can only be enqueued in an earlier stage. We don't expect
|
|
|
|
|
* anymore changes of type D-Bus at this point. */
|
|
|
|
|
nm_assert(!nml_dbus_object_obj_changed_any_linked(self, NML_DBUS_OBJ_CHANGED_TYPE_DBUS));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
_dbus_handle_changes_commit(NMClient *self, gboolean allow_init_start_check_complete)
|
|
|
|
|
{
|
2021-11-09 13:28:54 +01:00
|
|
|
NMClientPrivate *priv = NM_CLIENT_GET_PRIVATE(self);
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
nm_auto_pop_gmaincontext GMainContext *dbus_context = NULL;
|
|
|
|
|
|
|
|
|
|
_dbus_handle_obj_changed_nmobj(self);
|
|
|
|
|
|
|
|
|
|
dbus_context = nm_g_main_context_push_thread_default_if_necessary(priv->main_context);
|
|
|
|
|
|
|
|
|
|
_nm_client_notify_event_emit(self);
|
|
|
|
|
|
2023-01-16 08:10:37 +01:00
|
|
|
_set_nm_running(self, FALSE);
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
|
|
|
|
|
if (allow_init_start_check_complete)
|
|
|
|
|
_init_start_check_complete(self);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
2021-11-09 13:28:54 +01:00
|
|
|
_dbus_handle_changes(NMClient *self,
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
const char *log_context,
|
|
|
|
|
gboolean allow_init_start_check_complete)
|
|
|
|
|
{
|
|
|
|
|
_dbus_handle_obj_changed_dbus(self, log_context);
|
|
|
|
|
_dbus_handle_changes_commit(self, allow_init_start_check_complete);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static gboolean
|
2021-11-09 13:28:54 +01:00
|
|
|
_dbus_handle_properties_changed(NMClient *self,
|
|
|
|
|
const char *log_context,
|
|
|
|
|
const char *object_path,
|
|
|
|
|
const char *interface_name,
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
gboolean allow_add_iface,
|
2021-11-09 13:28:54 +01:00
|
|
|
GVariant *changed_properties,
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
NMLDBusObject **inout_dbobj)
|
|
|
|
|
{
|
2021-11-09 13:28:54 +01:00
|
|
|
NMLDBusObject *dbobj = NULL;
|
|
|
|
|
NMLDBusObjIfaceData *db_iface_data = NULL;
|
|
|
|
|
nm_auto_ref_string NMRefString *dbus_path = NULL;
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
nm_assert(!changed_properties
|
|
|
|
|
|| g_variant_is_of_type(changed_properties, G_VARIANT_TYPE("a{sv}")));
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
{
|
|
|
|
|
gs_free char *ss = NULL;
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
NML_NMCLIENT_LOG_T(self,
|
2021-02-15 09:11:42 +01:00
|
|
|
"[%s]: %s: properties changed for interface %s %s%s%s",
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
object_path,
|
|
|
|
|
log_context,
|
|
|
|
|
interface_name,
|
2021-02-15 09:11:42 +01:00
|
|
|
NM_PRINT_FMT_QUOTED(changed_properties,
|
|
|
|
|
"{ ",
|
|
|
|
|
(ss = g_variant_print(changed_properties, TRUE)),
|
|
|
|
|
" }",
|
|
|
|
|
"(no changed properties)"));
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
}
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
if (inout_dbobj) {
|
|
|
|
|
dbobj = *inout_dbobj;
|
|
|
|
|
nm_assert(!dbobj || nm_streq(object_path, dbobj->dbus_path->str));
|
|
|
|
|
}
|
|
|
|
|
if (!dbobj) {
|
|
|
|
|
dbus_path = nm_ref_string_new(object_path);
|
|
|
|
|
dbobj = _dbobjs_dbobj_get_r(self, dbus_path);
|
|
|
|
|
}
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: fix tracking object state in NMClient cache
NMClient has a NMLDBusObject instance for each D-Bus object
that it sees. This object can be in different states, like that we
already saw it on D-Bus or that it is only referred to by another
property. Due to a bug, we would wrongly not update the state and
trigger an assertion.
Reproduce with python-dbusmock (commit e89e28bf1bc0254a1eb71b71cf68ef7a97d11e5b)
by running `pytest -v -s tests/test_networkmanager.py -k test_one_wifi_with_accesspoints`.
With LIBNM_CLIENT_DEBUG we get:
>>> libnm-dbus[96085]: <trace> [6464.06459] nmclient[c9bf1eaa1f4b6c99]: [/org/freedesktop/NetworkManager/Devices/mock_WiFi2]: properties-changed: properties changed for interface org.freedesktop.NetworkManager.Device { {'ActiveConnection': <objectpath '/org/freedesktop/NetworkManager/ActiveConnection/0'>} }
libnm-dbus[96085]: <trace> [6464.06459] nmclient[c9bf1eaa1f4b6c99]: [/org/freedesktop/NetworkManager/Devices/mock_WiFi2]: properties-changed: set property org.freedesktop.NetworkManager.Device.ActiveConnection
libnm-dbus[96085]: <trace> [6464.06459] nmclient[c9bf1eaa1f4b6c99]: [/org/freedesktop/NetworkManager/Devices/mock_WiFi2]: changed-type 0x01 linked
libnm-dbus[96085]: <trace> [6464.06459] nmclient[c9bf1eaa1f4b6c99]: [/org/freedesktop/NetworkManager/Devices/mock_WiFi2]: changed-type 0x01 consumed
>>> libnm-dbus[96085]: <trace> [6464.06459] nmclient[c9bf1eaa1f4b6c99]: [/org/freedesktop/NetworkManager/ActiveConnection/0]: set D-Bus object state watched-only
libnm-dbus[96085]: <trace> [6464.06459] nmclient[c9bf1eaa1f4b6c99]: [/org/freedesktop/NetworkManager/Devices/mock_WiFi2]: changed-type 0x02 linked
libnm-dbus[96085]: <trace> [6464.06459] nmclient[c9bf1eaa1f4b6c99]: [/org/freedesktop/NetworkManager]: changed-type 0x02 linked
libnm-dbus[96085]: <trace> [6464.06459] nmclient[c9bf1eaa1f4b6c99]: [/org/freedesktop/NetworkManager/Devices/mock_WiFi2]: changed-type 0x02 consumed
>>> libnm-dbus[96085]: <error> [6464.06459] nmclient[c9bf1eaa1f4b6c99]: [/org/freedesktop/NetworkManager/Devices/mock_WiFi2]: property ActiveConnection references /org/freedesktop/NetworkManager/ActiveConnection/0 but object is not present on D-Bus
libnm-dbus[96085]: <trace> [6464.06459] nmclient[c9bf1eaa1f4b6c99]: [/org/freedesktop/NetworkManager]: changed-type 0x02 consumed
libnm-dbus[96085]: <trace> [6464.06460] nmclient[c9bf1eaa1f4b6c99]: [/org/freedesktop/NetworkManager/Devices/mock_WiFi2]: properties-changed: properties changed for interface org.freedesktop.NetworkManager.Device { {'State': <uint32 100>} }
libnm-dbus[96085]: <trace> [6464.06460] nmclient[c9bf1eaa1f4b6c99]: [/org/freedesktop/NetworkManager/Devices/mock_WiFi2]: properties-changed: set property org.freedesktop.NetworkManager.Device.State
libnm-dbus[96085]: <trace> [6464.06460] nmclient[c9bf1eaa1f4b6c99]: [/org/freedesktop/NetworkManager/Devices/mock_WiFi2]: changed-type 0x01 linked
libnm-dbus[96085]: <trace> [6464.06460] nmclient[c9bf1eaa1f4b6c99]: [/org/freedesktop/NetworkManager/Devices/mock_WiFi2]: changed-type 0x01 consumed
libnm-dbus[96085]: <trace> [6464.06460] nmclient[c9bf1eaa1f4b6c99]: [/org/freedesktop/NetworkManager/Devices/mock_WiFi2]: changed-type 0x02 linked
libnm-dbus[96085]: <trace> [6464.06460] nmclient[c9bf1eaa1f4b6c99]: [/org/freedesktop/NetworkManager]: changed-type 0x02 linked
libnm-dbus[96085]: <trace> [6464.06461] nmclient[c9bf1eaa1f4b6c99]: [/org/freedesktop/NetworkManager/Devices/mock_WiFi2]: changed-type 0x02 consumed
libnm-dbus[96085]: <trace> [6464.06461] nmclient[c9bf1eaa1f4b6c99]: [/org/freedesktop/NetworkManager]: changed-type 0x02 consumed
libnm-dbus[96085]: <trace> [6464.06462] nmclient[c9bf1eaa1f4b6c99]: [/org/freedesktop/NetworkManager/Devices/mock_WiFi2]: properties-changed: properties changed for interface org.freedesktop.NetworkManager.Device { {'StateReason': <(uint32 100, uint32 0)>} }
libnm-dbus[96085]: <trace> [6464.06462] nmclient[c9bf1eaa1f4b6c99]: [/org/freedesktop/NetworkManager/Devices/mock_WiFi2]: properties-changed: set property org.freedesktop.NetworkManager.Device.StateReason
libnm-dbus[96085]: <trace> [6464.06462] nmclient[c9bf1eaa1f4b6c99]: [/org/freedesktop/NetworkManager/Devices/mock_WiFi2]: changed-type 0x01 linked
libnm-dbus[96085]: <trace> [6464.06462] nmclient[c9bf1eaa1f4b6c99]: [/org/freedesktop/NetworkManager/Devices/mock_WiFi2]: changed-type 0x01 consumed
libnm-dbus[96085]: <trace> [6464.06462] nmclient[c9bf1eaa1f4b6c99]: [/org/freedesktop/NetworkManager/Devices/mock_WiFi2]: changed-type 0x02 linked
libnm-dbus[96085]: <trace> [6464.06462] nmclient[c9bf1eaa1f4b6c99]: [/org/freedesktop/NetworkManager]: changed-type 0x02 linked
libnm-dbus[96085]: <trace> [6464.06462] nmclient[c9bf1eaa1f4b6c99]: [/org/freedesktop/NetworkManager/Devices/mock_WiFi2]: changed-type 0x02 consumed
libnm-dbus[96085]: <trace> [6464.06462] nmclient[c9bf1eaa1f4b6c99]: [/org/freedesktop/NetworkManager]: changed-type 0x02 consumed
>>> libnm-dbus[96085]: <trace> [6464.06465] nmclient[c9bf1eaa1f4b6c99]: [/org/freedesktop/NetworkManager/ActiveConnection/0]: interfaces-added: properties changed for interface org.freedesktop.NetworkManager.Connection.Active { {'Devices': <[objectpath '/org/freedesktop/NetworkManager/Devices/mock_WiFi2']>, 'Default6': <false>, 'Default': <true>, 'Type': <'802-11-wireless'>, 'Vpn': <false>, 'Connection': <objectpath '/org/freedesktop/NetworkManager/Settings/Mock_AP3'>, 'Master': <objectpath '/'>, 'SpecificObject': <objectpath '/org/freedesktop/NetworkManager/AccessPoint/Mock_AP3'>, 'Uuid': <'72757a57-8cb6-4052-a18f-4e2be4ba27d9'>, 'State': <uint32 2>, 'Id': <'AP_3'>} }
>>> here we lack "set D-Bus object state on-dbus"
libnm-dbus[96085]: <trace> [6464.06465] nmclient[c9bf1eaa1f4b6c99]: [/org/freedesktop/NetworkManager/ActiveConnection/0]: interfaces-added: set property org.freedesktop.NetworkManager.Connection.Active.Devices
libnm-dbus[96085]: <trace> [6464.06465] nmclient[c9bf1eaa1f4b6c99]: [/org/freedesktop/NetworkManager/ActiveConnection/0]: interfaces-added: set property org.freedesktop.NetworkManager.Connection.Active.Default6
libnm-dbus[96085]: <trace> [6464.06465] nmclient[c9bf1eaa1f4b6c99]: [/org/freedesktop/NetworkManager/ActiveConnection/0]: interfaces-added: set property org.freedesktop.NetworkManager.Connection.Active.Default
libnm-dbus[96085]: <trace> [6464.06465] nmclient[c9bf1eaa1f4b6c99]: [/org/freedesktop/NetworkManager/ActiveConnection/0]: interfaces-added: set property org.freedesktop.NetworkManager.Connection.Active.Type
libnm-dbus[96085]: <trace> [6464.06465] nmclient[c9bf1eaa1f4b6c99]: [/org/freedesktop/NetworkManager/ActiveConnection/0]: interfaces-added: set property org.freedesktop.NetworkManager.Connection.Active.Vpn
libnm-dbus[96085]: <trace> [6464.06465] nmclient[c9bf1eaa1f4b6c99]: [/org/freedesktop/NetworkManager/ActiveConnection/0]: interfaces-added: set property org.freedesktop.NetworkManager.Connection.Active.Connection
libnm-dbus[96085]: <trace> [6464.06465] nmclient[c9bf1eaa1f4b6c99]: [/org/freedesktop/NetworkManager/ActiveConnection/0]: interfaces-added: set property org.freedesktop.NetworkManager.Connection.Active.Master
libnm-dbus[96085]: <trace> [6464.06465] nmclient[c9bf1eaa1f4b6c99]: [/org/freedesktop/NetworkManager/ActiveConnection/0]: interfaces-added: set property org.freedesktop.NetworkManager.Connection.Active.SpecificObject
libnm-dbus[96085]: <trace> [6464.06466] nmclient[c9bf1eaa1f4b6c99]: [/org/freedesktop/NetworkManager/ActiveConnection/0]: interfaces-added: set property org.freedesktop.NetworkManager.Connection.Active.Uuid
libnm-dbus[96085]: <trace> [6464.06466] nmclient[c9bf1eaa1f4b6c99]: [/org/freedesktop/NetworkManager/ActiveConnection/0]: interfaces-added: set property org.freedesktop.NetworkManager.Connection.Active.State
libnm-dbus[96085]: <trace> [6464.06466] nmclient[c9bf1eaa1f4b6c99]: [/org/freedesktop/NetworkManager/ActiveConnection/0]: interfaces-added: set property org.freedesktop.NetworkManager.Connection.Active.Id
libnm-dbus[96085]: <trace> [6464.06466] nmclient[c9bf1eaa1f4b6c99]: [/org/freedesktop/NetworkManager/ActiveConnection/0]: changed-type 0x01 linked
libnm-dbus[96085]: <trace> [6464.06466] nmclient[c9bf1eaa1f4b6c99]: [/org/freedesktop/NetworkManager/ActiveConnection/0]: changed-type 0x01 consumed
Bail out! libnm:ERROR:libnm/nm-client.c:2863:_dbus_handle_obj_changed_dbus: assertion failed: (dbobj->obj_state >= NML_DBUS_OBJ_STATE_ON_DBUS)
Backtrace:
#3 0x00007f0bd11173bf in g_assertion_message_expr
(domain=domain@entry=0x7f0bd1576018 "libnm", file=file@entry=0x7f0bd1576006 "libnm/nm-client.c", line=line@entry=2863, func=func@entry=0x7f0bd157f1b0 <__func__.170> "_dbus_handle_obj_changed_dbus", expr=expr@entry=0x7f0bd157cba0 "dbobj->obj_state >= NML_DBUS_OBJ_STATE_ON_DBUS") at ../glib/gtestutils.c:2963
#4 0x00007f0bd14959dd in _dbus_handle_obj_changed_dbus (self=self@entry=0x5612d4f5a130, log_context=<optimized out>) at libnm/nm-client.c:2863
#5 0x00007f0bd1495c29 in _dbus_handle_changes (self=self@entry=0x5612d4f5a130, log_context=<optimized out>, allow_init_start_check_complete=allow_init_start_check_complete@entry=1)
at libnm/nm-client.c:2909
#6 0x00007f0bd1497e56 in _dbus_managed_objects_changed_cb
(connection=<optimized out>, sender_name=<optimized out>, arg_object_path=<optimized out>, interface_name=<optimized out>, signal_name=<optimized out>, parameters=0x7f0bb800d720, user_data=0x5612d4f5a130) at libnm/nm-client.c:3172
#7 0x00007f0bd132a8df in emit_signal_instance_in_idle_cb (data=data@entry=0x7f0bb8003700) at ../gio/gdbusconnection.c:3789
#8 0x00007f0bd10f1b5b in g_idle_dispatch (source=source@entry=0x7f0bb8012260, callback=0x7f0bd132a860 <emit_signal_instance_in_idle_cb>, user_data=0x7f0bb8003700) at ../glib/gmain.c:5836
#9 0x00007f0bd10f2a9f in g_main_dispatch (context=0x5612d4f4b630) at ../glib/gmain.c:3325
#10 g_main_context_dispatch (context=0x5612d4f4b630) at ../glib/gmain.c:4043
#11 0x00007f0bd1144a98 in g_main_context_iterate.constprop.0 (context=0x5612d4f4b630, block=block@entry=1, dispatch=dispatch@entry=1, self=<optimized out>) at ../glib/gmain.c:4119
#12 0x00007f0bd10f2163 in g_main_loop_run (loop=0x5612d4f4b720) at ../glib/gmain.c:4317
#13 0x00005612d44b6543 in main (argc=7, argv=0x7fff4414f1d8) at clients/cli/nmcli.c:1036
https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=982613
https://gitlab.freedesktop.org/NetworkManager/NetworkManager/-/issues/662
Fixes: ce0e898fb476 ('libnm: refactor caching of D-Bus objects in NMClient')
(cherry picked from commit e1e9abdf041b4cc95fb1936b75ced7669f3d7867)
2021-02-15 09:17:07 +01:00
|
|
|
if (dbobj) {
|
|
|
|
|
nm_assert(dbobj->obj_state >= NML_DBUS_OBJ_STATE_WATCHED_ONLY);
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
db_iface_data = nml_dbus_object_iface_data_get(dbobj, interface_name, allow_add_iface);
|
libnm: fix tracking object state in NMClient cache
NMClient has a NMLDBusObject instance for each D-Bus object
that it sees. This object can be in different states, like that we
already saw it on D-Bus or that it is only referred to by another
property. Due to a bug, we would wrongly not update the state and
trigger an assertion.
Reproduce with python-dbusmock (commit e89e28bf1bc0254a1eb71b71cf68ef7a97d11e5b)
by running `pytest -v -s tests/test_networkmanager.py -k test_one_wifi_with_accesspoints`.
With LIBNM_CLIENT_DEBUG we get:
>>> libnm-dbus[96085]: <trace> [6464.06459] nmclient[c9bf1eaa1f4b6c99]: [/org/freedesktop/NetworkManager/Devices/mock_WiFi2]: properties-changed: properties changed for interface org.freedesktop.NetworkManager.Device { {'ActiveConnection': <objectpath '/org/freedesktop/NetworkManager/ActiveConnection/0'>} }
libnm-dbus[96085]: <trace> [6464.06459] nmclient[c9bf1eaa1f4b6c99]: [/org/freedesktop/NetworkManager/Devices/mock_WiFi2]: properties-changed: set property org.freedesktop.NetworkManager.Device.ActiveConnection
libnm-dbus[96085]: <trace> [6464.06459] nmclient[c9bf1eaa1f4b6c99]: [/org/freedesktop/NetworkManager/Devices/mock_WiFi2]: changed-type 0x01 linked
libnm-dbus[96085]: <trace> [6464.06459] nmclient[c9bf1eaa1f4b6c99]: [/org/freedesktop/NetworkManager/Devices/mock_WiFi2]: changed-type 0x01 consumed
>>> libnm-dbus[96085]: <trace> [6464.06459] nmclient[c9bf1eaa1f4b6c99]: [/org/freedesktop/NetworkManager/ActiveConnection/0]: set D-Bus object state watched-only
libnm-dbus[96085]: <trace> [6464.06459] nmclient[c9bf1eaa1f4b6c99]: [/org/freedesktop/NetworkManager/Devices/mock_WiFi2]: changed-type 0x02 linked
libnm-dbus[96085]: <trace> [6464.06459] nmclient[c9bf1eaa1f4b6c99]: [/org/freedesktop/NetworkManager]: changed-type 0x02 linked
libnm-dbus[96085]: <trace> [6464.06459] nmclient[c9bf1eaa1f4b6c99]: [/org/freedesktop/NetworkManager/Devices/mock_WiFi2]: changed-type 0x02 consumed
>>> libnm-dbus[96085]: <error> [6464.06459] nmclient[c9bf1eaa1f4b6c99]: [/org/freedesktop/NetworkManager/Devices/mock_WiFi2]: property ActiveConnection references /org/freedesktop/NetworkManager/ActiveConnection/0 but object is not present on D-Bus
libnm-dbus[96085]: <trace> [6464.06459] nmclient[c9bf1eaa1f4b6c99]: [/org/freedesktop/NetworkManager]: changed-type 0x02 consumed
libnm-dbus[96085]: <trace> [6464.06460] nmclient[c9bf1eaa1f4b6c99]: [/org/freedesktop/NetworkManager/Devices/mock_WiFi2]: properties-changed: properties changed for interface org.freedesktop.NetworkManager.Device { {'State': <uint32 100>} }
libnm-dbus[96085]: <trace> [6464.06460] nmclient[c9bf1eaa1f4b6c99]: [/org/freedesktop/NetworkManager/Devices/mock_WiFi2]: properties-changed: set property org.freedesktop.NetworkManager.Device.State
libnm-dbus[96085]: <trace> [6464.06460] nmclient[c9bf1eaa1f4b6c99]: [/org/freedesktop/NetworkManager/Devices/mock_WiFi2]: changed-type 0x01 linked
libnm-dbus[96085]: <trace> [6464.06460] nmclient[c9bf1eaa1f4b6c99]: [/org/freedesktop/NetworkManager/Devices/mock_WiFi2]: changed-type 0x01 consumed
libnm-dbus[96085]: <trace> [6464.06460] nmclient[c9bf1eaa1f4b6c99]: [/org/freedesktop/NetworkManager/Devices/mock_WiFi2]: changed-type 0x02 linked
libnm-dbus[96085]: <trace> [6464.06460] nmclient[c9bf1eaa1f4b6c99]: [/org/freedesktop/NetworkManager]: changed-type 0x02 linked
libnm-dbus[96085]: <trace> [6464.06461] nmclient[c9bf1eaa1f4b6c99]: [/org/freedesktop/NetworkManager/Devices/mock_WiFi2]: changed-type 0x02 consumed
libnm-dbus[96085]: <trace> [6464.06461] nmclient[c9bf1eaa1f4b6c99]: [/org/freedesktop/NetworkManager]: changed-type 0x02 consumed
libnm-dbus[96085]: <trace> [6464.06462] nmclient[c9bf1eaa1f4b6c99]: [/org/freedesktop/NetworkManager/Devices/mock_WiFi2]: properties-changed: properties changed for interface org.freedesktop.NetworkManager.Device { {'StateReason': <(uint32 100, uint32 0)>} }
libnm-dbus[96085]: <trace> [6464.06462] nmclient[c9bf1eaa1f4b6c99]: [/org/freedesktop/NetworkManager/Devices/mock_WiFi2]: properties-changed: set property org.freedesktop.NetworkManager.Device.StateReason
libnm-dbus[96085]: <trace> [6464.06462] nmclient[c9bf1eaa1f4b6c99]: [/org/freedesktop/NetworkManager/Devices/mock_WiFi2]: changed-type 0x01 linked
libnm-dbus[96085]: <trace> [6464.06462] nmclient[c9bf1eaa1f4b6c99]: [/org/freedesktop/NetworkManager/Devices/mock_WiFi2]: changed-type 0x01 consumed
libnm-dbus[96085]: <trace> [6464.06462] nmclient[c9bf1eaa1f4b6c99]: [/org/freedesktop/NetworkManager/Devices/mock_WiFi2]: changed-type 0x02 linked
libnm-dbus[96085]: <trace> [6464.06462] nmclient[c9bf1eaa1f4b6c99]: [/org/freedesktop/NetworkManager]: changed-type 0x02 linked
libnm-dbus[96085]: <trace> [6464.06462] nmclient[c9bf1eaa1f4b6c99]: [/org/freedesktop/NetworkManager/Devices/mock_WiFi2]: changed-type 0x02 consumed
libnm-dbus[96085]: <trace> [6464.06462] nmclient[c9bf1eaa1f4b6c99]: [/org/freedesktop/NetworkManager]: changed-type 0x02 consumed
>>> libnm-dbus[96085]: <trace> [6464.06465] nmclient[c9bf1eaa1f4b6c99]: [/org/freedesktop/NetworkManager/ActiveConnection/0]: interfaces-added: properties changed for interface org.freedesktop.NetworkManager.Connection.Active { {'Devices': <[objectpath '/org/freedesktop/NetworkManager/Devices/mock_WiFi2']>, 'Default6': <false>, 'Default': <true>, 'Type': <'802-11-wireless'>, 'Vpn': <false>, 'Connection': <objectpath '/org/freedesktop/NetworkManager/Settings/Mock_AP3'>, 'Master': <objectpath '/'>, 'SpecificObject': <objectpath '/org/freedesktop/NetworkManager/AccessPoint/Mock_AP3'>, 'Uuid': <'72757a57-8cb6-4052-a18f-4e2be4ba27d9'>, 'State': <uint32 2>, 'Id': <'AP_3'>} }
>>> here we lack "set D-Bus object state on-dbus"
libnm-dbus[96085]: <trace> [6464.06465] nmclient[c9bf1eaa1f4b6c99]: [/org/freedesktop/NetworkManager/ActiveConnection/0]: interfaces-added: set property org.freedesktop.NetworkManager.Connection.Active.Devices
libnm-dbus[96085]: <trace> [6464.06465] nmclient[c9bf1eaa1f4b6c99]: [/org/freedesktop/NetworkManager/ActiveConnection/0]: interfaces-added: set property org.freedesktop.NetworkManager.Connection.Active.Default6
libnm-dbus[96085]: <trace> [6464.06465] nmclient[c9bf1eaa1f4b6c99]: [/org/freedesktop/NetworkManager/ActiveConnection/0]: interfaces-added: set property org.freedesktop.NetworkManager.Connection.Active.Default
libnm-dbus[96085]: <trace> [6464.06465] nmclient[c9bf1eaa1f4b6c99]: [/org/freedesktop/NetworkManager/ActiveConnection/0]: interfaces-added: set property org.freedesktop.NetworkManager.Connection.Active.Type
libnm-dbus[96085]: <trace> [6464.06465] nmclient[c9bf1eaa1f4b6c99]: [/org/freedesktop/NetworkManager/ActiveConnection/0]: interfaces-added: set property org.freedesktop.NetworkManager.Connection.Active.Vpn
libnm-dbus[96085]: <trace> [6464.06465] nmclient[c9bf1eaa1f4b6c99]: [/org/freedesktop/NetworkManager/ActiveConnection/0]: interfaces-added: set property org.freedesktop.NetworkManager.Connection.Active.Connection
libnm-dbus[96085]: <trace> [6464.06465] nmclient[c9bf1eaa1f4b6c99]: [/org/freedesktop/NetworkManager/ActiveConnection/0]: interfaces-added: set property org.freedesktop.NetworkManager.Connection.Active.Master
libnm-dbus[96085]: <trace> [6464.06465] nmclient[c9bf1eaa1f4b6c99]: [/org/freedesktop/NetworkManager/ActiveConnection/0]: interfaces-added: set property org.freedesktop.NetworkManager.Connection.Active.SpecificObject
libnm-dbus[96085]: <trace> [6464.06466] nmclient[c9bf1eaa1f4b6c99]: [/org/freedesktop/NetworkManager/ActiveConnection/0]: interfaces-added: set property org.freedesktop.NetworkManager.Connection.Active.Uuid
libnm-dbus[96085]: <trace> [6464.06466] nmclient[c9bf1eaa1f4b6c99]: [/org/freedesktop/NetworkManager/ActiveConnection/0]: interfaces-added: set property org.freedesktop.NetworkManager.Connection.Active.State
libnm-dbus[96085]: <trace> [6464.06466] nmclient[c9bf1eaa1f4b6c99]: [/org/freedesktop/NetworkManager/ActiveConnection/0]: interfaces-added: set property org.freedesktop.NetworkManager.Connection.Active.Id
libnm-dbus[96085]: <trace> [6464.06466] nmclient[c9bf1eaa1f4b6c99]: [/org/freedesktop/NetworkManager/ActiveConnection/0]: changed-type 0x01 linked
libnm-dbus[96085]: <trace> [6464.06466] nmclient[c9bf1eaa1f4b6c99]: [/org/freedesktop/NetworkManager/ActiveConnection/0]: changed-type 0x01 consumed
Bail out! libnm:ERROR:libnm/nm-client.c:2863:_dbus_handle_obj_changed_dbus: assertion failed: (dbobj->obj_state >= NML_DBUS_OBJ_STATE_ON_DBUS)
Backtrace:
#3 0x00007f0bd11173bf in g_assertion_message_expr
(domain=domain@entry=0x7f0bd1576018 "libnm", file=file@entry=0x7f0bd1576006 "libnm/nm-client.c", line=line@entry=2863, func=func@entry=0x7f0bd157f1b0 <__func__.170> "_dbus_handle_obj_changed_dbus", expr=expr@entry=0x7f0bd157cba0 "dbobj->obj_state >= NML_DBUS_OBJ_STATE_ON_DBUS") at ../glib/gtestutils.c:2963
#4 0x00007f0bd14959dd in _dbus_handle_obj_changed_dbus (self=self@entry=0x5612d4f5a130, log_context=<optimized out>) at libnm/nm-client.c:2863
#5 0x00007f0bd1495c29 in _dbus_handle_changes (self=self@entry=0x5612d4f5a130, log_context=<optimized out>, allow_init_start_check_complete=allow_init_start_check_complete@entry=1)
at libnm/nm-client.c:2909
#6 0x00007f0bd1497e56 in _dbus_managed_objects_changed_cb
(connection=<optimized out>, sender_name=<optimized out>, arg_object_path=<optimized out>, interface_name=<optimized out>, signal_name=<optimized out>, parameters=0x7f0bb800d720, user_data=0x5612d4f5a130) at libnm/nm-client.c:3172
#7 0x00007f0bd132a8df in emit_signal_instance_in_idle_cb (data=data@entry=0x7f0bb8003700) at ../gio/gdbusconnection.c:3789
#8 0x00007f0bd10f1b5b in g_idle_dispatch (source=source@entry=0x7f0bb8012260, callback=0x7f0bd132a860 <emit_signal_instance_in_idle_cb>, user_data=0x7f0bb8003700) at ../glib/gmain.c:5836
#9 0x00007f0bd10f2a9f in g_main_dispatch (context=0x5612d4f4b630) at ../glib/gmain.c:3325
#10 g_main_context_dispatch (context=0x5612d4f4b630) at ../glib/gmain.c:4043
#11 0x00007f0bd1144a98 in g_main_context_iterate.constprop.0 (context=0x5612d4f4b630, block=block@entry=1, dispatch=dispatch@entry=1, self=<optimized out>) at ../glib/gmain.c:4119
#12 0x00007f0bd10f2163 in g_main_loop_run (loop=0x5612d4f4b720) at ../glib/gmain.c:4317
#13 0x00005612d44b6543 in main (argc=7, argv=0x7fff4414f1d8) at clients/cli/nmcli.c:1036
https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=982613
https://gitlab.freedesktop.org/NetworkManager/NetworkManager/-/issues/662
Fixes: ce0e898fb476 ('libnm: refactor caching of D-Bus objects in NMClient')
(cherry picked from commit e1e9abdf041b4cc95fb1936b75ced7669f3d7867)
2021-02-15 09:17:07 +01:00
|
|
|
if (db_iface_data && dbobj->obj_state == NML_DBUS_OBJ_STATE_WATCHED_ONLY)
|
|
|
|
|
nml_dbus_object_set_obj_state(dbobj, NML_DBUS_OBJ_STATE_ON_DBUS, self);
|
|
|
|
|
} else if (allow_add_iface) {
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
dbobj = _dbobjs_dbobj_create(self, g_steal_pointer(&dbus_path));
|
|
|
|
|
nml_dbus_object_set_obj_state(dbobj, NML_DBUS_OBJ_STATE_ON_DBUS, self);
|
|
|
|
|
db_iface_data = nml_dbus_object_iface_data_get(dbobj, interface_name, TRUE);
|
2021-02-15 09:59:53 +01:00
|
|
|
nm_assert(db_iface_data);
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
}
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
NM_SET_OUT(inout_dbobj, dbobj);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
if (!db_iface_data) {
|
|
|
|
|
if (allow_add_iface)
|
|
|
|
|
NML_NMCLIENT_LOG_E(self,
|
|
|
|
|
"%s: [%s] too many interfaces on object. Something is very wrong",
|
|
|
|
|
log_context,
|
|
|
|
|
object_path);
|
|
|
|
|
else
|
|
|
|
|
NML_NMCLIENT_LOG_E(self,
|
|
|
|
|
"%s: [%s] property changed signal for non existing interface %s",
|
|
|
|
|
log_context,
|
|
|
|
|
object_path,
|
|
|
|
|
interface_name);
|
|
|
|
|
nm_assert(!dbobj || dbobj->obj_state != NML_DBUS_OBJ_STATE_UNLINKED);
|
|
|
|
|
return FALSE;
|
|
|
|
|
}
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
if (!db_iface_data->dbus_iface_is_wellknown)
|
|
|
|
|
NML_NMCLIENT_LOG_W(self,
|
|
|
|
|
"%s: [%s] ignore unknown interface %s",
|
|
|
|
|
log_context,
|
|
|
|
|
object_path,
|
|
|
|
|
interface_name);
|
|
|
|
|
else if (changed_properties) {
|
|
|
|
|
GVariantIter iter_prop;
|
2021-11-09 13:28:54 +01:00
|
|
|
const char *property_name;
|
|
|
|
|
GVariant *property_value_tmp;
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
g_variant_iter_init(&iter_prop, changed_properties);
|
|
|
|
|
while (g_variant_iter_next(&iter_prop, "{&sv}", &property_name, &property_value_tmp)) {
|
|
|
|
|
_nm_unused gs_unref_variant GVariant *property_value = property_value_tmp;
|
2021-11-09 13:28:54 +01:00
|
|
|
const NMLDBusMetaProperty *meta_property;
|
|
|
|
|
NMLDBusObjPropData *db_propdata;
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
guint property_idx;
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
meta_property = nml_dbus_meta_property_get(db_iface_data->dbus_iface.meta,
|
|
|
|
|
property_name,
|
|
|
|
|
&property_idx);
|
|
|
|
|
if (!meta_property) {
|
|
|
|
|
NML_NMCLIENT_LOG_W(self,
|
|
|
|
|
"%s: [%s]: ignore unknown property %s.%s",
|
|
|
|
|
log_context,
|
|
|
|
|
object_path,
|
|
|
|
|
interface_name,
|
|
|
|
|
property_name);
|
|
|
|
|
continue;
|
|
|
|
|
}
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
db_propdata = &db_iface_data->prop_datas[property_idx];
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
NML_NMCLIENT_LOG_T(self,
|
|
|
|
|
"[%s]: %s: %s property %s.%s",
|
|
|
|
|
object_path,
|
|
|
|
|
log_context,
|
|
|
|
|
db_propdata->prop_data_value ? "update" : "set",
|
|
|
|
|
interface_name,
|
|
|
|
|
property_name);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
nm_g_variant_unref(db_propdata->prop_data_value);
|
|
|
|
|
db_propdata->prop_data_value = g_steal_pointer(&property_value);
|
|
|
|
|
nm_c_list_move_tail(&db_iface_data->changed_prop_lst_head,
|
|
|
|
|
&db_propdata->changed_prop_lst);
|
|
|
|
|
}
|
2020-09-28 16:03:33 +02:00
|
|
|
}
|
|
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
nml_dbus_object_obj_changed_link(self, dbobj, NML_DBUS_OBJ_CHANGED_TYPE_DBUS);
|
|
|
|
|
return TRUE;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static gboolean
|
2021-11-09 13:28:54 +01:00
|
|
|
_dbus_handle_interface_added(NMClient *self,
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
const char *log_context,
|
|
|
|
|
const char *object_path,
|
2021-11-09 13:28:54 +01:00
|
|
|
GVariant *ifaces)
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
{
|
2023-01-16 08:09:55 +01:00
|
|
|
gboolean changed = FALSE;
|
|
|
|
|
const char *interface_name;
|
|
|
|
|
GVariant *changed_properties;
|
|
|
|
|
GVariantIter iter_ifaces;
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
|
|
|
|
|
nm_assert(g_variant_is_of_type(ifaces, G_VARIANT_TYPE("a{sa{sv}}")));
|
|
|
|
|
|
|
|
|
|
g_variant_iter_init(&iter_ifaces, ifaces);
|
|
|
|
|
while (g_variant_iter_next(&iter_ifaces, "{&s@a{sv}}", &interface_name, &changed_properties)) {
|
|
|
|
|
_nm_unused gs_unref_variant GVariant *changed_properties_free = changed_properties;
|
|
|
|
|
|
|
|
|
|
if (_dbus_handle_properties_changed(self,
|
|
|
|
|
log_context,
|
|
|
|
|
object_path,
|
|
|
|
|
interface_name,
|
|
|
|
|
TRUE,
|
|
|
|
|
changed_properties,
|
2023-01-16 08:09:55 +01:00
|
|
|
NULL))
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
changed = TRUE;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return changed;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static gboolean
|
2021-11-09 13:28:54 +01:00
|
|
|
_dbus_handle_interface_removed(NMClient *self,
|
|
|
|
|
const char *log_context,
|
|
|
|
|
const char *object_path,
|
|
|
|
|
NMLDBusObject **inout_dbobj,
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
const char *const *removed_interfaces)
|
|
|
|
|
{
|
|
|
|
|
gboolean changed = FALSE;
|
|
|
|
|
NMLDBusObject *dbobj;
|
|
|
|
|
gsize i;
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
if (inout_dbobj && *inout_dbobj) {
|
|
|
|
|
dbobj = *inout_dbobj;
|
|
|
|
|
nm_assert(dbobj == _dbobjs_dbobj_get_s(self, object_path));
|
|
|
|
|
} else {
|
|
|
|
|
dbobj = _dbobjs_dbobj_get_s(self, object_path);
|
|
|
|
|
if (!dbobj) {
|
|
|
|
|
NML_NMCLIENT_LOG_E(self,
|
|
|
|
|
"%s: [%s]: receive interface removed event for non existing object",
|
|
|
|
|
log_context,
|
|
|
|
|
object_path);
|
|
|
|
|
return FALSE;
|
|
|
|
|
}
|
|
|
|
|
NM_SET_OUT(inout_dbobj, dbobj);
|
|
|
|
|
}
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
for (i = 0; removed_interfaces[i]; i++) {
|
|
|
|
|
NMLDBusObjIfaceData *db_iface_data;
|
2021-11-09 13:28:54 +01:00
|
|
|
const char *interface_name = removed_interfaces[i];
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
db_iface_data = nml_dbus_object_iface_data_get(dbobj, interface_name, FALSE);
|
|
|
|
|
if (!db_iface_data) {
|
|
|
|
|
NML_NMCLIENT_LOG_E(
|
|
|
|
|
self,
|
|
|
|
|
"%s: [%s] receive interface remove event for unexpected interface %s",
|
|
|
|
|
log_context,
|
|
|
|
|
object_path,
|
|
|
|
|
interface_name);
|
|
|
|
|
continue;
|
|
|
|
|
}
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
NML_NMCLIENT_LOG_T(self,
|
|
|
|
|
"%s: [%s] receive interface remove event for interface %s",
|
|
|
|
|
log_context,
|
|
|
|
|
object_path,
|
|
|
|
|
interface_name);
|
|
|
|
|
db_iface_data->iface_removed = TRUE;
|
|
|
|
|
changed = TRUE;
|
|
|
|
|
}
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
if (changed)
|
|
|
|
|
nml_dbus_object_obj_changed_link(self, dbobj, NML_DBUS_OBJ_CHANGED_TYPE_DBUS);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
return changed;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
2020-01-14 11:28:54 +01:00
|
|
|
_dbus_managed_objects_changed_cb(GDBusConnection *connection,
|
2021-11-09 13:28:54 +01:00
|
|
|
const char *sender_name,
|
|
|
|
|
const char *arg_object_path,
|
|
|
|
|
const char *interface_name,
|
|
|
|
|
const char *signal_name,
|
|
|
|
|
GVariant *parameters,
|
2020-01-14 11:28:54 +01:00
|
|
|
gpointer user_data)
|
2020-09-28 16:03:33 +02:00
|
|
|
{
|
2021-11-09 13:28:54 +01:00
|
|
|
NMClient *self = user_data;
|
2020-01-14 11:28:54 +01:00
|
|
|
NMClientPrivate *priv = NM_CLIENT_GET_PRIVATE(self);
|
2021-11-09 13:28:54 +01:00
|
|
|
const char *log_context;
|
2020-01-14 11:28:54 +01:00
|
|
|
gboolean changed;
|
|
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
nm_assert(nm_streq0(interface_name, DBUS_INTERFACE_OBJECT_MANAGER));
|
|
|
|
|
|
|
|
|
|
if (priv->get_managed_objects_cancellable) {
|
|
|
|
|
/* we still wait for the initial GetManagedObjects(). Ignore the event. */
|
2020-09-28 16:03:33 +02:00
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
if (nm_streq(signal_name, "InterfacesAdded")) {
|
|
|
|
|
gs_unref_variant GVariant *interfaces_and_properties = NULL;
|
2021-11-09 13:28:54 +01:00
|
|
|
const char *object_path;
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
|
|
|
|
|
if (!g_variant_is_of_type(parameters, G_VARIANT_TYPE("(oa{sa{sv}})")))
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
g_variant_get(parameters, "(&o@a{sa{sv}})", &object_path, &interfaces_and_properties);
|
|
|
|
|
|
|
|
|
|
log_context = "interfaces-added";
|
|
|
|
|
changed =
|
|
|
|
|
_dbus_handle_interface_added(self, log_context, object_path, interfaces_and_properties);
|
2020-01-14 11:28:54 +01:00
|
|
|
goto out;
|
2020-09-28 16:03:33 +02:00
|
|
|
}
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
|
|
|
|
|
if (nm_streq(signal_name, "InterfacesRemoved")) {
|
|
|
|
|
gs_free const char **interfaces = NULL;
|
2021-11-09 13:28:54 +01:00
|
|
|
const char *object_path;
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
|
|
|
|
|
if (!g_variant_is_of_type(parameters, G_VARIANT_TYPE("(oas)")))
|
2020-09-28 16:03:33 +02:00
|
|
|
return;
|
|
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
g_variant_get(parameters, "(&o^a&s)", &object_path, &interfaces);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
log_context = "interfaces-removed";
|
|
|
|
|
changed = _dbus_handle_interface_removed(self, log_context, object_path, NULL, interfaces);
|
2020-01-14 11:28:54 +01:00
|
|
|
goto out;
|
2020-09-28 16:03:33 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
out:
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
if (changed)
|
|
|
|
|
_dbus_handle_changes(self, log_context, TRUE);
|
2020-09-28 16:03:33 +02:00
|
|
|
}
|
|
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
static void
|
|
|
|
|
_dbus_properties_changed_cb(GDBusConnection *connection,
|
2021-11-09 13:28:54 +01:00
|
|
|
const char *sender_name,
|
|
|
|
|
const char *object_path,
|
|
|
|
|
const char *signal_interface_name,
|
|
|
|
|
const char *signal_name,
|
|
|
|
|
GVariant *parameters,
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
gpointer user_data)
|
2020-09-28 16:03:33 +02:00
|
|
|
{
|
2021-11-09 13:28:54 +01:00
|
|
|
NMClient *self = user_data;
|
|
|
|
|
NMClientPrivate *priv = NM_CLIENT_GET_PRIVATE(self);
|
|
|
|
|
const char *interface_name;
|
2020-01-14 11:28:54 +01:00
|
|
|
gs_unref_variant GVariant *changed_properties = NULL;
|
2021-11-09 13:28:54 +01:00
|
|
|
gs_free const char **invalidated_properties = NULL;
|
|
|
|
|
const char *log_context = "properties-changed";
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
if (priv->get_managed_objects_cancellable) {
|
|
|
|
|
/* we still wait for the initial GetManagedObjects(). Ignore the event. */
|
2020-09-28 16:03:33 +02:00
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2020-01-14 11:28:54 +01:00
|
|
|
if (!g_variant_is_of_type(parameters, G_VARIANT_TYPE("(sa{sv}as)")))
|
2020-09-28 16:03:33 +02:00
|
|
|
return;
|
|
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
g_variant_get(parameters,
|
|
|
|
|
"(&s@a{sv}^a&s)",
|
|
|
|
|
&interface_name,
|
|
|
|
|
&changed_properties,
|
|
|
|
|
&invalidated_properties);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
if (invalidated_properties && invalidated_properties[0]) {
|
|
|
|
|
NML_NMCLIENT_LOG_W(self,
|
|
|
|
|
"%s: [%s] ignore invalidated properties on interface %s",
|
|
|
|
|
log_context,
|
|
|
|
|
object_path,
|
2020-03-13 18:11:56 +01:00
|
|
|
interface_name);
|
2020-09-28 16:03:33 +02:00
|
|
|
}
|
|
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
if (_dbus_handle_properties_changed(self,
|
|
|
|
|
log_context,
|
|
|
|
|
object_path,
|
|
|
|
|
interface_name,
|
2020-09-28 16:03:33 +02:00
|
|
|
FALSE,
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
changed_properties,
|
2020-09-28 16:03:33 +02:00
|
|
|
NULL))
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
_dbus_handle_changes(self, log_context, TRUE);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
libnm: fix leaking internal GMainContext for synchronously initialized NMClient
NMClient makes asynchronous D-Bus calls via g_dbus_connection_call().
This references the current GMainContext to later invoke the
asynchronous callback. Even when we cancel the asynchronous call,
the callback will still be invoked (later) to complete the request.
In particular this means when we destroy (unref) an NMClient, there
are quite possibly pending requests in the GMainContext. Although they
are cancelled, they keep the GMainContext alive.
With synchronous initialization, we have an internal GMainContext.
When we destroy the NMClient, we cannot just unhook the integrated
source, instead, we need to keep it integrated in the caller's main
context, as long as there are pending requests.
Add a mechanism to track those pending requests and fix the leak for the
internal GMainContext. Also expose the same mechanism to the user via a new
API called nm_client_get_context_busy_watcher(). This allows the user
to know when it can stop iterating the main context and when all
resources are reclaimed.
For example the following will lead to a crash:
for i in range(1,2000):
nmc = NM.Client.new(None)
This creates a number of NMClient instances and destroys them again.
Note that here the GMainContext is never iterated, because
synchronous initialization does not iterate the caller's context. So,
while we correctly unref and dispose the created NMClient instances,
there are pending requests left in the inner GMainContext. These pile
up and soon the program will crash because it runs out of file descriptors.
We can have a similar problem with asynchronous initialization, when
we create a new GMainContext per client, and don't iterate it after
we are done with the client.
Note that this patch does not avoid the problem in general. The problem
cannot be avoided, the user must iterate the main contex at some point.
Otherwise resources (memory and file descriptors) will be leaked.
Fixes: ce0e898fb476 ('libnm: refactor caching of D-Bus objects in NMClient')
https://gitlab.freedesktop.org/NetworkManager/NetworkManager/merge_requests/347
2019-11-26 00:23:41 +01:00
|
|
|
_dbus_get_managed_objects_cb(GObject *source, GAsyncResult *result, gpointer user_data)
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
{
|
2021-11-09 13:28:54 +01:00
|
|
|
NMClient *self;
|
|
|
|
|
NMClientPrivate *priv;
|
|
|
|
|
gs_unref_variant GVariant *ret = NULL;
|
|
|
|
|
gs_unref_variant GVariant *managed_objects = NULL;
|
|
|
|
|
gs_free_error GError *error = NULL;
|
|
|
|
|
gs_unref_object GObject *context_busy_watcher = NULL;
|
libnm: fix leaking internal GMainContext for synchronously initialized NMClient
NMClient makes asynchronous D-Bus calls via g_dbus_connection_call().
This references the current GMainContext to later invoke the
asynchronous callback. Even when we cancel the asynchronous call,
the callback will still be invoked (later) to complete the request.
In particular this means when we destroy (unref) an NMClient, there
are quite possibly pending requests in the GMainContext. Although they
are cancelled, they keep the GMainContext alive.
With synchronous initialization, we have an internal GMainContext.
When we destroy the NMClient, we cannot just unhook the integrated
source, instead, we need to keep it integrated in the caller's main
context, as long as there are pending requests.
Add a mechanism to track those pending requests and fix the leak for the
internal GMainContext. Also expose the same mechanism to the user via a new
API called nm_client_get_context_busy_watcher(). This allows the user
to know when it can stop iterating the main context and when all
resources are reclaimed.
For example the following will lead to a crash:
for i in range(1,2000):
nmc = NM.Client.new(None)
This creates a number of NMClient instances and destroys them again.
Note that here the GMainContext is never iterated, because
synchronous initialization does not iterate the caller's context. So,
while we correctly unref and dispose the created NMClient instances,
there are pending requests left in the inner GMainContext. These pile
up and soon the program will crash because it runs out of file descriptors.
We can have a similar problem with asynchronous initialization, when
we create a new GMainContext per client, and don't iterate it after
we are done with the client.
Note that this patch does not avoid the problem in general. The problem
cannot be avoided, the user must iterate the main contex at some point.
Otherwise resources (memory and file descriptors) will be leaked.
Fixes: ce0e898fb476 ('libnm: refactor caching of D-Bus objects in NMClient')
https://gitlab.freedesktop.org/NetworkManager/NetworkManager/merge_requests/347
2019-11-26 00:23:41 +01:00
|
|
|
|
|
|
|
|
nm_utils_user_data_unpack(user_data, &self, &context_busy_watcher);
|
|
|
|
|
|
|
|
|
|
ret = g_dbus_connection_call_finish(G_DBUS_CONNECTION(source), result, &error);
|
|
|
|
|
|
|
|
|
|
nm_assert((!!ret) != (!!error));
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
|
2020-01-23 10:36:24 +01:00
|
|
|
if (!ret && nm_utils_error_is_cancelled(error))
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
priv = NM_CLIENT_GET_PRIVATE(self);
|
|
|
|
|
|
libnm: fix leaking internal GMainContext for synchronously initialized NMClient
NMClient makes asynchronous D-Bus calls via g_dbus_connection_call().
This references the current GMainContext to later invoke the
asynchronous callback. Even when we cancel the asynchronous call,
the callback will still be invoked (later) to complete the request.
In particular this means when we destroy (unref) an NMClient, there
are quite possibly pending requests in the GMainContext. Although they
are cancelled, they keep the GMainContext alive.
With synchronous initialization, we have an internal GMainContext.
When we destroy the NMClient, we cannot just unhook the integrated
source, instead, we need to keep it integrated in the caller's main
context, as long as there are pending requests.
Add a mechanism to track those pending requests and fix the leak for the
internal GMainContext. Also expose the same mechanism to the user via a new
API called nm_client_get_context_busy_watcher(). This allows the user
to know when it can stop iterating the main context and when all
resources are reclaimed.
For example the following will lead to a crash:
for i in range(1,2000):
nmc = NM.Client.new(None)
This creates a number of NMClient instances and destroys them again.
Note that here the GMainContext is never iterated, because
synchronous initialization does not iterate the caller's context. So,
while we correctly unref and dispose the created NMClient instances,
there are pending requests left in the inner GMainContext. These pile
up and soon the program will crash because it runs out of file descriptors.
We can have a similar problem with asynchronous initialization, when
we create a new GMainContext per client, and don't iterate it after
we are done with the client.
Note that this patch does not avoid the problem in general. The problem
cannot be avoided, the user must iterate the main contex at some point.
Otherwise resources (memory and file descriptors) will be leaked.
Fixes: ce0e898fb476 ('libnm: refactor caching of D-Bus objects in NMClient')
https://gitlab.freedesktop.org/NetworkManager/NetworkManager/merge_requests/347
2019-11-26 00:23:41 +01:00
|
|
|
if (ret) {
|
|
|
|
|
nm_assert(g_variant_is_of_type(ret, G_VARIANT_TYPE("(a{oa{sa{sv}}})")));
|
|
|
|
|
managed_objects = g_variant_get_child_value(ret, 0);
|
|
|
|
|
}
|
|
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
g_clear_object(&priv->get_managed_objects_cancellable);
|
|
|
|
|
|
libnm: fix leaking internal GMainContext for synchronously initialized NMClient
NMClient makes asynchronous D-Bus calls via g_dbus_connection_call().
This references the current GMainContext to later invoke the
asynchronous callback. Even when we cancel the asynchronous call,
the callback will still be invoked (later) to complete the request.
In particular this means when we destroy (unref) an NMClient, there
are quite possibly pending requests in the GMainContext. Although they
are cancelled, they keep the GMainContext alive.
With synchronous initialization, we have an internal GMainContext.
When we destroy the NMClient, we cannot just unhook the integrated
source, instead, we need to keep it integrated in the caller's main
context, as long as there are pending requests.
Add a mechanism to track those pending requests and fix the leak for the
internal GMainContext. Also expose the same mechanism to the user via a new
API called nm_client_get_context_busy_watcher(). This allows the user
to know when it can stop iterating the main context and when all
resources are reclaimed.
For example the following will lead to a crash:
for i in range(1,2000):
nmc = NM.Client.new(None)
This creates a number of NMClient instances and destroys them again.
Note that here the GMainContext is never iterated, because
synchronous initialization does not iterate the caller's context. So,
while we correctly unref and dispose the created NMClient instances,
there are pending requests left in the inner GMainContext. These pile
up and soon the program will crash because it runs out of file descriptors.
We can have a similar problem with asynchronous initialization, when
we create a new GMainContext per client, and don't iterate it after
we are done with the client.
Note that this patch does not avoid the problem in general. The problem
cannot be avoided, the user must iterate the main contex at some point.
Otherwise resources (memory and file descriptors) will be leaked.
Fixes: ce0e898fb476 ('libnm: refactor caching of D-Bus objects in NMClient')
https://gitlab.freedesktop.org/NetworkManager/NetworkManager/merge_requests/347
2019-11-26 00:23:41 +01:00
|
|
|
if (!managed_objects) {
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
NML_NMCLIENT_LOG_D(self, "GetManagedObjects() call failed: %s", error->message);
|
|
|
|
|
/* hm, now that's odd. Maybe NetworkManager just quit and we are about to get
|
|
|
|
|
* a name-owner changed signal soon. Treat this as if we got no managed objects at all. */
|
|
|
|
|
} else
|
|
|
|
|
NML_NMCLIENT_LOG_D(self, "GetManagedObjects() completed");
|
|
|
|
|
|
libnm: fix leaking internal GMainContext for synchronously initialized NMClient
NMClient makes asynchronous D-Bus calls via g_dbus_connection_call().
This references the current GMainContext to later invoke the
asynchronous callback. Even when we cancel the asynchronous call,
the callback will still be invoked (later) to complete the request.
In particular this means when we destroy (unref) an NMClient, there
are quite possibly pending requests in the GMainContext. Although they
are cancelled, they keep the GMainContext alive.
With synchronous initialization, we have an internal GMainContext.
When we destroy the NMClient, we cannot just unhook the integrated
source, instead, we need to keep it integrated in the caller's main
context, as long as there are pending requests.
Add a mechanism to track those pending requests and fix the leak for the
internal GMainContext. Also expose the same mechanism to the user via a new
API called nm_client_get_context_busy_watcher(). This allows the user
to know when it can stop iterating the main context and when all
resources are reclaimed.
For example the following will lead to a crash:
for i in range(1,2000):
nmc = NM.Client.new(None)
This creates a number of NMClient instances and destroys them again.
Note that here the GMainContext is never iterated, because
synchronous initialization does not iterate the caller's context. So,
while we correctly unref and dispose the created NMClient instances,
there are pending requests left in the inner GMainContext. These pile
up and soon the program will crash because it runs out of file descriptors.
We can have a similar problem with asynchronous initialization, when
we create a new GMainContext per client, and don't iterate it after
we are done with the client.
Note that this patch does not avoid the problem in general. The problem
cannot be avoided, the user must iterate the main contex at some point.
Otherwise resources (memory and file descriptors) will be leaked.
Fixes: ce0e898fb476 ('libnm: refactor caching of D-Bus objects in NMClient')
https://gitlab.freedesktop.org/NetworkManager/NetworkManager/merge_requests/347
2019-11-26 00:23:41 +01:00
|
|
|
if (managed_objects) {
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
GVariantIter iter;
|
2021-11-09 13:28:54 +01:00
|
|
|
const char *object_path;
|
|
|
|
|
GVariant *ifaces_tmp;
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
|
libnm: fix leaking internal GMainContext for synchronously initialized NMClient
NMClient makes asynchronous D-Bus calls via g_dbus_connection_call().
This references the current GMainContext to later invoke the
asynchronous callback. Even when we cancel the asynchronous call,
the callback will still be invoked (later) to complete the request.
In particular this means when we destroy (unref) an NMClient, there
are quite possibly pending requests in the GMainContext. Although they
are cancelled, they keep the GMainContext alive.
With synchronous initialization, we have an internal GMainContext.
When we destroy the NMClient, we cannot just unhook the integrated
source, instead, we need to keep it integrated in the caller's main
context, as long as there are pending requests.
Add a mechanism to track those pending requests and fix the leak for the
internal GMainContext. Also expose the same mechanism to the user via a new
API called nm_client_get_context_busy_watcher(). This allows the user
to know when it can stop iterating the main context and when all
resources are reclaimed.
For example the following will lead to a crash:
for i in range(1,2000):
nmc = NM.Client.new(None)
This creates a number of NMClient instances and destroys them again.
Note that here the GMainContext is never iterated, because
synchronous initialization does not iterate the caller's context. So,
while we correctly unref and dispose the created NMClient instances,
there are pending requests left in the inner GMainContext. These pile
up and soon the program will crash because it runs out of file descriptors.
We can have a similar problem with asynchronous initialization, when
we create a new GMainContext per client, and don't iterate it after
we are done with the client.
Note that this patch does not avoid the problem in general. The problem
cannot be avoided, the user must iterate the main contex at some point.
Otherwise resources (memory and file descriptors) will be leaked.
Fixes: ce0e898fb476 ('libnm: refactor caching of D-Bus objects in NMClient')
https://gitlab.freedesktop.org/NetworkManager/NetworkManager/merge_requests/347
2019-11-26 00:23:41 +01:00
|
|
|
g_variant_iter_init(&iter, managed_objects);
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
while (g_variant_iter_next(&iter, "{&o@a{sa{sv}}}", &object_path, &ifaces_tmp)) {
|
|
|
|
|
gs_unref_variant GVariant *ifaces = ifaces_tmp;
|
|
|
|
|
|
|
|
|
|
_dbus_handle_interface_added(self, "get-managed-objects", object_path, ifaces);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* always call _dbus_handle_changes(), even if nothing changed. We need this to complete
|
|
|
|
|
* initialization. */
|
|
|
|
|
_dbus_handle_changes(self, "get-managed-objects", TRUE);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*****************************************************************************/
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
_nm_client_get_settings_call_cb(GObject *source, GAsyncResult *result, gpointer user_data)
|
|
|
|
|
{
|
2021-11-09 13:28:54 +01:00
|
|
|
NMRemoteConnection *remote_connection;
|
|
|
|
|
NMClient *self;
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
gs_unref_variant GVariant *ret = NULL;
|
2021-11-09 13:28:54 +01:00
|
|
|
gs_free_error GError *error = NULL;
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
gs_unref_variant GVariant *settings = NULL;
|
2021-11-09 13:28:54 +01:00
|
|
|
NMLDBusObject *dbobj;
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
|
|
|
|
|
ret = g_dbus_connection_call_finish(G_DBUS_CONNECTION(source), result, &error);
|
2020-01-23 10:36:24 +01:00
|
|
|
if (!ret && nm_utils_error_is_cancelled(error))
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
remote_connection = user_data;
|
|
|
|
|
|
|
|
|
|
self = _nm_object_get_client(remote_connection);
|
|
|
|
|
|
|
|
|
|
dbobj = _nm_object_get_dbobj(remote_connection);
|
|
|
|
|
|
|
|
|
|
_ASSERT_dbobj(dbobj, self);
|
|
|
|
|
|
|
|
|
|
if (!ret) {
|
|
|
|
|
NML_NMCLIENT_LOG_T(self,
|
|
|
|
|
"[%s] GetSettings() completed with error: %s",
|
|
|
|
|
dbobj->dbus_path->str,
|
|
|
|
|
error->message);
|
|
|
|
|
} else {
|
|
|
|
|
NML_NMCLIENT_LOG_T(self,
|
|
|
|
|
"[%s] GetSettings() completed with success",
|
|
|
|
|
dbobj->dbus_path->str);
|
|
|
|
|
g_variant_get(ret, "(@a{sa{sv}})", &settings);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
_nm_remote_settings_get_settings_commit(remote_connection, settings);
|
|
|
|
|
|
|
|
|
|
_dbus_handle_changes_commit(self, TRUE);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
_nm_client_get_settings_call(NMClient *self, NMLDBusObject *dbobj)
|
|
|
|
|
{
|
|
|
|
|
GCancellable *cancellable;
|
|
|
|
|
|
|
|
|
|
cancellable = _nm_remote_settings_get_settings_prepare(NM_REMOTE_CONNECTION(dbobj->nmobj));
|
|
|
|
|
|
|
|
|
|
_nm_client_dbus_call_simple(self,
|
|
|
|
|
cancellable,
|
|
|
|
|
dbobj->dbus_path->str,
|
|
|
|
|
NM_DBUS_INTERFACE_SETTINGS_CONNECTION,
|
|
|
|
|
"GetSettings",
|
|
|
|
|
g_variant_new("()"),
|
|
|
|
|
G_VARIANT_TYPE("(a{sa{sv}})"),
|
|
|
|
|
G_DBUS_CALL_FLAGS_NONE,
|
|
|
|
|
NM_DBUS_DEFAULT_TIMEOUT_MSEC,
|
|
|
|
|
_nm_client_get_settings_call_cb,
|
|
|
|
|
dbobj->nmobj);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
_dbus_settings_updated_cb(GDBusConnection *connection,
|
2021-11-09 13:28:54 +01:00
|
|
|
const char *sender_name,
|
|
|
|
|
const char *object_path,
|
|
|
|
|
const char *signal_interface_name,
|
|
|
|
|
const char *signal_name,
|
|
|
|
|
GVariant *parameters,
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
gpointer user_data)
|
|
|
|
|
{
|
2021-11-09 13:28:54 +01:00
|
|
|
NMClient *self = user_data;
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
NMClientPrivate *priv = NM_CLIENT_GET_PRIVATE(self);
|
2021-11-09 13:28:54 +01:00
|
|
|
const char *log_context = "settings-updated";
|
|
|
|
|
NMLDBusObject *dbobj;
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
|
|
|
|
|
if (priv->get_managed_objects_cancellable) {
|
|
|
|
|
/* we still wait for the initial GetManagedObjects(). Ignore the event. */
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!g_variant_is_of_type(parameters, G_VARIANT_TYPE("()")))
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
dbobj = _dbobjs_dbobj_get_s(self, object_path);
|
|
|
|
|
|
|
|
|
|
if (!dbobj || !NM_IS_REMOTE_CONNECTION(dbobj->nmobj)) {
|
|
|
|
|
NML_NMCLIENT_LOG_W(self,
|
|
|
|
|
"%s: [%s] ignore Updated signal for non-existing setting",
|
|
|
|
|
log_context,
|
|
|
|
|
object_path);
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
NML_NMCLIENT_LOG_T(self, "%s: [%s] Updated signal received", log_context, object_path);
|
|
|
|
|
|
|
|
|
|
_nm_client_get_settings_call(self, dbobj);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*****************************************************************************/
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
_dbus_nm_connection_active_state_changed_cb(GDBusConnection *connection,
|
2021-11-09 13:28:54 +01:00
|
|
|
const char *sender_name,
|
|
|
|
|
const char *object_path,
|
|
|
|
|
const char *signal_interface_name,
|
|
|
|
|
const char *signal_name,
|
|
|
|
|
GVariant *parameters,
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
gpointer user_data)
|
|
|
|
|
{
|
2021-11-09 13:28:54 +01:00
|
|
|
NMClient *self = user_data;
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
NMClientPrivate *priv = NM_CLIENT_GET_PRIVATE(self);
|
2021-11-09 13:28:54 +01:00
|
|
|
const char *log_context = "active-connection-state-changed";
|
|
|
|
|
NMLDBusObject *dbobj;
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
guint32 state;
|
|
|
|
|
guint32 reason;
|
|
|
|
|
|
|
|
|
|
if (priv->get_managed_objects_cancellable) {
|
|
|
|
|
/* we still wait for the initial GetManagedObjects(). Ignore the event. */
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!g_variant_is_of_type(parameters, G_VARIANT_TYPE("(uu)"))) {
|
|
|
|
|
NML_NMCLIENT_LOG_E(self,
|
|
|
|
|
"%s: [%s] ignore StateChanged signal with unexpected signature",
|
|
|
|
|
log_context,
|
|
|
|
|
object_path);
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
dbobj = _dbobjs_dbobj_get_s(self, object_path);
|
|
|
|
|
|
|
|
|
|
if (!dbobj || !NM_IS_ACTIVE_CONNECTION(dbobj->nmobj)) {
|
|
|
|
|
NML_NMCLIENT_LOG_E(self,
|
|
|
|
|
"%s: [%s] ignore StateChanged signal for non-existing active connection",
|
|
|
|
|
log_context,
|
|
|
|
|
object_path);
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
g_variant_get(parameters, "(uu)", &state, &reason);
|
|
|
|
|
|
|
|
|
|
NML_NMCLIENT_LOG_T(self, "%s: [%s] StateChanged signal received", log_context, object_path);
|
|
|
|
|
|
|
|
|
|
_nm_active_connection_state_changed_commit(NM_ACTIVE_CONNECTION(dbobj->nmobj), state, reason);
|
|
|
|
|
|
|
|
|
|
_dbus_handle_changes_commit(self, TRUE);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*****************************************************************************/
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
_dbus_nm_vpn_connection_state_changed_cb(GDBusConnection *connection,
|
2021-11-09 13:28:54 +01:00
|
|
|
const char *sender_name,
|
|
|
|
|
const char *object_path,
|
|
|
|
|
const char *signal_interface_name,
|
|
|
|
|
const char *signal_name,
|
|
|
|
|
GVariant *parameters,
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
gpointer user_data)
|
|
|
|
|
{
|
2021-11-09 13:28:54 +01:00
|
|
|
NMClient *self = user_data;
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
NMClientPrivate *priv = NM_CLIENT_GET_PRIVATE(self);
|
2021-11-09 13:28:54 +01:00
|
|
|
const char *log_context = "vpn-connection-state-changed";
|
|
|
|
|
NMLDBusObject *dbobj;
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
guint32 state;
|
|
|
|
|
guint32 reason;
|
|
|
|
|
|
|
|
|
|
if (priv->get_managed_objects_cancellable) {
|
|
|
|
|
/* we still wait for the initial GetManagedObjects(). Ignore the event. */
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!g_variant_is_of_type(parameters, G_VARIANT_TYPE("(uu)"))) {
|
|
|
|
|
NML_NMCLIENT_LOG_E(self,
|
|
|
|
|
"%s: [%s] ignore VpnStateChanged signal with unexpected signature",
|
|
|
|
|
log_context,
|
|
|
|
|
object_path);
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
dbobj = _dbobjs_dbobj_get_s(self, object_path);
|
|
|
|
|
|
|
|
|
|
if (!dbobj || !NM_IS_VPN_CONNECTION(dbobj->nmobj)) {
|
|
|
|
|
NML_NMCLIENT_LOG_E(self,
|
|
|
|
|
"%s: [%s] ignore VpnStateChanged signal for non-existing vpn connection",
|
|
|
|
|
log_context,
|
|
|
|
|
object_path);
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
g_variant_get(parameters, "(uu)", &state, &reason);
|
|
|
|
|
|
|
|
|
|
NML_NMCLIENT_LOG_T(self, "%s: [%s] VpnStateChanged signal received", log_context, object_path);
|
|
|
|
|
|
|
|
|
|
_nm_vpn_connection_state_changed_commit(NM_VPN_CONNECTION(dbobj->nmobj), state, reason);
|
|
|
|
|
|
|
|
|
|
_dbus_handle_changes_commit(self, TRUE);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*****************************************************************************/
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
_emit_permissions_changed(NMClient *self, const guint8 *old_permissions, const guint8 *permissions)
|
|
|
|
|
{
|
2019-12-06 11:48:04 +01:00
|
|
|
int i;
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
|
|
|
|
|
if (self->obj_base.is_disposing)
|
|
|
|
|
return;
|
|
|
|
|
|
2019-12-06 11:48:04 +01:00
|
|
|
if (old_permissions == permissions)
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
for (i = 0; i < (int) G_N_ELEMENTS(nm_auth_permission_sorted); i++) {
|
|
|
|
|
NMClientPermission perm = nm_auth_permission_sorted[i];
|
|
|
|
|
NMClientPermissionResult perm_result = NM_CLIENT_PERMISSION_RESULT_UNKNOWN;
|
|
|
|
|
NMClientPermissionResult perm_result_old = NM_CLIENT_PERMISSION_RESULT_UNKNOWN;
|
|
|
|
|
|
|
|
|
|
if (permissions)
|
|
|
|
|
perm_result = permissions[perm - 1];
|
|
|
|
|
if (old_permissions)
|
|
|
|
|
perm_result_old = old_permissions[perm - 1];
|
|
|
|
|
|
|
|
|
|
if (perm_result == perm_result_old)
|
|
|
|
|
continue;
|
|
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
g_signal_emit(self, signals[PERMISSION_CHANGED], 0, (guint) perm, (guint) perm_result);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
_dbus_check_permissions_start_cb(GObject *source, GAsyncResult *result, gpointer user_data)
|
|
|
|
|
{
|
2021-11-09 13:28:54 +01:00
|
|
|
nm_auto_pop_gmaincontext GMainContext *dbus_context = NULL;
|
|
|
|
|
NMClient *self;
|
|
|
|
|
NMClientPrivate *priv;
|
|
|
|
|
gs_unref_variant GVariant *ret = NULL;
|
|
|
|
|
nm_auto_free_variant_iter GVariantIter *v_permissions = NULL;
|
|
|
|
|
gs_free guint8 *old_permissions = NULL;
|
|
|
|
|
gs_free_error GError *error = NULL;
|
|
|
|
|
const char *pkey;
|
|
|
|
|
const char *pvalue;
|
|
|
|
|
int i;
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
|
|
|
|
|
ret = g_dbus_connection_call_finish(G_DBUS_CONNECTION(source), result, &error);
|
2020-01-23 10:36:24 +01:00
|
|
|
if (!ret && nm_utils_error_is_cancelled(error))
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
self = user_data;
|
|
|
|
|
priv = NM_CLIENT_GET_PRIVATE(self);
|
|
|
|
|
|
|
|
|
|
g_clear_object(&priv->permissions_cancellable);
|
|
|
|
|
|
2019-12-05 15:53:51 +01:00
|
|
|
old_permissions = g_steal_pointer(&priv->permissions);
|
|
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
if (!ret) {
|
2019-12-05 15:53:51 +01:00
|
|
|
/* when the call completes, we always pretend success. Even a failure means
|
|
|
|
|
* that we fetched the permissions, however they are all unknown. */
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
NML_NMCLIENT_LOG_T(self, "GetPermissions call failed: %s", error->message);
|
2019-12-05 15:53:51 +01:00
|
|
|
goto out;
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
NML_NMCLIENT_LOG_T(self, "GetPermissions call finished with success");
|
|
|
|
|
|
|
|
|
|
g_variant_get(ret, "(a{ss})", &v_permissions);
|
|
|
|
|
while (g_variant_iter_next(v_permissions, "{&s&s}", &pkey, &pvalue)) {
|
|
|
|
|
NMClientPermission perm;
|
|
|
|
|
NMClientPermissionResult perm_result;
|
|
|
|
|
|
2019-12-09 08:02:01 +01:00
|
|
|
perm = nm_auth_permission_from_string(pkey);
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
if (perm == NM_CLIENT_PERMISSION_NONE)
|
|
|
|
|
continue;
|
|
|
|
|
|
2019-12-05 15:08:17 +01:00
|
|
|
perm_result = nm_client_permission_result_from_string(pvalue);
|
2019-12-05 15:53:51 +01:00
|
|
|
|
|
|
|
|
if (!priv->permissions) {
|
|
|
|
|
if (perm_result == NM_CLIENT_PERMISSION_RESULT_UNKNOWN)
|
|
|
|
|
continue;
|
|
|
|
|
priv->permissions = g_new(guint8, G_N_ELEMENTS(nm_auth_permission_sorted));
|
|
|
|
|
for (i = 0; i < (int) G_N_ELEMENTS(nm_auth_permission_sorted); i++)
|
|
|
|
|
priv->permissions[i] = NM_CLIENT_PERMISSION_RESULT_UNKNOWN;
|
|
|
|
|
}
|
2019-12-06 11:48:04 +01:00
|
|
|
priv->permissions[perm - 1] = perm_result;
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
}
|
|
|
|
|
|
2019-12-05 15:53:51 +01:00
|
|
|
out:
|
|
|
|
|
priv->permissions_state = NM_TERNARY_TRUE;
|
|
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
dbus_context = nm_g_main_context_push_thread_default_if_necessary(priv->main_context);
|
2019-12-06 11:48:04 +01:00
|
|
|
_emit_permissions_changed(self, old_permissions, priv->permissions);
|
2019-12-05 15:53:51 +01:00
|
|
|
_notify(self, PROP_PERMISSIONS_STATE);
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
_dbus_check_permissions_start(NMClient *self)
|
|
|
|
|
{
|
|
|
|
|
NMClientPrivate *priv = NM_CLIENT_GET_PRIVATE(self);
|
2019-12-05 15:53:51 +01:00
|
|
|
gboolean fetch;
|
|
|
|
|
|
|
|
|
|
fetch = !NM_FLAGS_HAS((NMClientInstanceFlags) priv->instance_flags,
|
|
|
|
|
NM_CLIENT_INSTANCE_FLAGS_NO_AUTO_FETCH_PERMISSIONS);
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
|
|
|
|
|
nm_clear_g_cancellable(&priv->permissions_cancellable);
|
|
|
|
|
|
2019-12-05 15:53:51 +01:00
|
|
|
if (fetch) {
|
|
|
|
|
NML_NMCLIENT_LOG_T(self, "GetPermissions() call started...");
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
|
2019-12-05 15:53:51 +01:00
|
|
|
priv->permissions_cancellable = g_cancellable_new();
|
|
|
|
|
_nm_client_dbus_call_simple(self,
|
|
|
|
|
priv->permissions_cancellable,
|
|
|
|
|
NM_DBUS_PATH,
|
|
|
|
|
NM_DBUS_INTERFACE,
|
|
|
|
|
"GetPermissions",
|
|
|
|
|
g_variant_new("()"),
|
|
|
|
|
G_VARIANT_TYPE("(a{ss})"),
|
|
|
|
|
G_DBUS_CALL_FLAGS_NONE,
|
|
|
|
|
NM_DBUS_DEFAULT_TIMEOUT_MSEC,
|
|
|
|
|
_dbus_check_permissions_start_cb,
|
|
|
|
|
self);
|
|
|
|
|
}
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
_dbus_nm_check_permissions_cb(GDBusConnection *connection,
|
2021-11-09 13:28:54 +01:00
|
|
|
const char *sender_name,
|
|
|
|
|
const char *object_path,
|
|
|
|
|
const char *signal_interface_name,
|
|
|
|
|
const char *signal_name,
|
|
|
|
|
GVariant *parameters,
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
gpointer user_data)
|
|
|
|
|
{
|
2021-11-09 13:28:54 +01:00
|
|
|
NMClient *self = user_data;
|
2019-12-05 15:53:51 +01:00
|
|
|
NMClientPrivate *priv = NM_CLIENT_GET_PRIVATE(self);
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
|
|
|
|
|
if (!g_variant_is_of_type(parameters, G_VARIANT_TYPE("()"))) {
|
|
|
|
|
NML_NMCLIENT_LOG_E(self,
|
|
|
|
|
"ignore CheckPermissions signal with unexpected signature %s",
|
|
|
|
|
g_variant_get_type_string(parameters));
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
_dbus_check_permissions_start(self);
|
2019-12-05 15:53:51 +01:00
|
|
|
|
|
|
|
|
if (priv->permissions_state == NM_TERNARY_TRUE)
|
|
|
|
|
priv->permissions_state = NM_TERNARY_FALSE;
|
|
|
|
|
_notify(self, PROP_PERMISSIONS_STATE);
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*****************************************************************************/
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
_property_ao_notify_changed_connections_cb(NMLDBusPropertyAO *pr_ao,
|
2021-11-09 13:28:54 +01:00
|
|
|
NMClient *self,
|
|
|
|
|
NMObject *nmobj,
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
gboolean is_added /* or else removed */)
|
|
|
|
|
{
|
|
|
|
|
_nm_client_notify_event_queue_emit_obj_signal(self,
|
|
|
|
|
G_OBJECT(self),
|
|
|
|
|
nmobj,
|
|
|
|
|
is_added,
|
|
|
|
|
5,
|
|
|
|
|
is_added ? signals[CONNECTION_ADDED]
|
|
|
|
|
: signals[CONNECTION_REMOVED]);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
_property_ao_notify_changed_all_devices_cb(NMLDBusPropertyAO *pr_ao,
|
2021-11-09 13:28:54 +01:00
|
|
|
NMClient *self,
|
|
|
|
|
NMObject *nmobj,
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
gboolean is_added /* or else removed */)
|
|
|
|
|
{
|
|
|
|
|
_nm_client_notify_event_queue_emit_obj_signal(self,
|
|
|
|
|
G_OBJECT(self),
|
|
|
|
|
nmobj,
|
|
|
|
|
is_added,
|
|
|
|
|
6,
|
|
|
|
|
is_added ? signals[ANY_DEVICE_ADDED]
|
|
|
|
|
: signals[ANY_DEVICE_REMOVED]);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
_property_ao_notify_changed_devices_cb(NMLDBusPropertyAO *pr_ao,
|
2021-11-09 13:28:54 +01:00
|
|
|
NMClient *self,
|
|
|
|
|
NMObject *nmobj,
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
gboolean is_added /* or else removed */)
|
|
|
|
|
{
|
|
|
|
|
_nm_client_notify_event_queue_emit_obj_signal(self,
|
|
|
|
|
G_OBJECT(self),
|
|
|
|
|
nmobj,
|
|
|
|
|
is_added,
|
|
|
|
|
7,
|
|
|
|
|
is_added ? signals[DEVICE_ADDED]
|
|
|
|
|
: signals[DEVICE_REMOVED]);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
_property_ao_notify_changed_active_connections_cb(NMLDBusPropertyAO *pr_ao,
|
2021-11-09 13:28:54 +01:00
|
|
|
NMClient *self,
|
|
|
|
|
NMObject *nmobj,
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
gboolean is_added /* or else removed */)
|
|
|
|
|
{
|
|
|
|
|
_nm_client_notify_event_queue_emit_obj_signal(self,
|
|
|
|
|
G_OBJECT(self),
|
|
|
|
|
nmobj,
|
|
|
|
|
is_added,
|
|
|
|
|
8,
|
|
|
|
|
is_added ? signals[ACTIVE_CONNECTION_ADDED]
|
|
|
|
|
: signals[ACTIVE_CONNECTION_REMOVED]);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*****************************************************************************/
|
|
|
|
|
|
|
|
|
|
typedef struct {
|
|
|
|
|
NMLDBusObjWatcherWithPtr *obj_watcher;
|
2021-11-09 13:28:54 +01:00
|
|
|
const char *op_name;
|
|
|
|
|
NMLDBusObject *dbobj;
|
|
|
|
|
GTask *task;
|
|
|
|
|
GVariant *extra_results;
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
gpointer result;
|
|
|
|
|
GType gtype;
|
|
|
|
|
gulong cancellable_id;
|
|
|
|
|
} RequestWaitData;
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
_request_wait_data_free(RequestWaitData *request_data)
|
|
|
|
|
{
|
|
|
|
|
nm_assert(!request_data->obj_watcher);
|
|
|
|
|
nm_assert(request_data->cancellable_id == 0);
|
|
|
|
|
nm_assert(!request_data->task || G_IS_TASK(request_data->task));
|
|
|
|
|
|
|
|
|
|
nm_g_object_unref(request_data->task);
|
|
|
|
|
nm_g_object_unref(request_data->result);
|
|
|
|
|
nm_g_variant_unref(request_data->extra_results);
|
|
|
|
|
if (request_data->dbobj)
|
|
|
|
|
nml_dbus_object_unref(request_data->dbobj);
|
|
|
|
|
nm_g_slice_free(request_data);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
_request_wait_task_return(RequestWaitData *request_data)
|
|
|
|
|
{
|
|
|
|
|
gs_unref_object GTask *task = NULL;
|
|
|
|
|
|
|
|
|
|
nm_assert(request_data);
|
|
|
|
|
nm_assert(G_IS_TASK(request_data->task));
|
|
|
|
|
nm_assert(request_data->dbobj);
|
|
|
|
|
nm_assert(NM_IS_OBJECT(request_data->dbobj->nmobj));
|
|
|
|
|
nm_assert(!request_data->result);
|
|
|
|
|
|
|
|
|
|
task = g_steal_pointer(&request_data->task);
|
|
|
|
|
|
|
|
|
|
request_data->result = g_object_ref(request_data->dbobj->nmobj);
|
|
|
|
|
nm_clear_g_signal_handler(g_task_get_cancellable(task), &request_data->cancellable_id);
|
|
|
|
|
nm_clear_pointer(&request_data->dbobj, nml_dbus_object_unref);
|
|
|
|
|
g_task_return_pointer(task, request_data, (GDestroyNotify) _request_wait_data_free);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static gboolean
|
|
|
|
|
_request_wait_complete(NMClient *self, RequestWaitData *request_data, gboolean force_complete)
|
|
|
|
|
{
|
|
|
|
|
NMLDBusObject *dbobj;
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
nm_assert(request_data);
|
|
|
|
|
nm_assert(!request_data->result);
|
|
|
|
|
nm_assert(!request_data->obj_watcher);
|
|
|
|
|
nm_assert(request_data->dbobj);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
dbobj = request_data->dbobj;
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
if (dbobj->obj_state == NML_DBUS_OBJ_STATE_WITH_NMOBJ_READY) {
|
|
|
|
|
NML_NMCLIENT_LOG_D(self,
|
|
|
|
|
"%s() succeeded with %s",
|
|
|
|
|
request_data->op_name,
|
|
|
|
|
dbobj->dbus_path->str);
|
|
|
|
|
nm_assert(G_TYPE_CHECK_INSTANCE_TYPE(dbobj->nmobj, request_data->gtype));
|
|
|
|
|
_request_wait_task_return(request_data);
|
|
|
|
|
return TRUE;
|
|
|
|
|
}
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
if (force_complete || dbobj->obj_state != NML_DBUS_OBJ_STATE_WITH_NMOBJ_NOT_READY) {
|
|
|
|
|
if (force_complete)
|
|
|
|
|
NML_NMCLIENT_LOG_D(self,
|
|
|
|
|
"%s() succeeded with %s but object is in an unsuitable state",
|
|
|
|
|
request_data->op_name,
|
|
|
|
|
dbobj->dbus_path->str);
|
|
|
|
|
else
|
|
|
|
|
NML_NMCLIENT_LOG_W(self,
|
|
|
|
|
"%s() succeeded with %s but object is in an unsuitable state",
|
|
|
|
|
request_data->op_name,
|
|
|
|
|
dbobj->dbus_path->str);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
g_task_return_error(
|
|
|
|
|
request_data->task,
|
|
|
|
|
g_error_new(NM_CLIENT_ERROR,
|
|
|
|
|
NM_CLIENT_ERROR_OBJECT_CREATION_FAILED,
|
|
|
|
|
_("request succeeded with %s but object is in an unsuitable state"),
|
|
|
|
|
dbobj->dbus_path->str));
|
|
|
|
|
_request_wait_data_free(request_data);
|
|
|
|
|
return TRUE;
|
|
|
|
|
}
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
return FALSE;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
_request_wait_complete_cb(NMClient *self, NMClientNotifyEventWithPtr *notify_event)
|
|
|
|
|
{
|
|
|
|
|
_request_wait_complete(self, notify_event->user_data, TRUE);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
_request_wait_obj_watcher_cb(NMClient *self, gpointer obj_watcher_base)
|
|
|
|
|
{
|
|
|
|
|
NMLDBusObjWatcherWithPtr *obj_watcher = obj_watcher_base;
|
2021-11-09 13:28:54 +01:00
|
|
|
RequestWaitData *request_data = obj_watcher->user_data;
|
|
|
|
|
NMLDBusObject *dbobj;
|
2014-07-24 08:53:33 -04:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
dbobj = request_data->dbobj;
|
2019-10-11 11:58:09 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
if (dbobj->obj_state == NML_DBUS_OBJ_STATE_WITH_NMOBJ_NOT_READY)
|
|
|
|
|
return;
|
libnm: hide NMClient struct from public headers and use direct private field
Having the NMClient/NMClientClass structs in the public header allows
the user to subclass these types. Subclassing this type was never
intended, nor is it supported, nor does it seem useful. Subclassing only
makes sense if the type has suitable hooks to extend the type in a
meaningful way. NMClient hasn't, and everybody trying to derive from
this class would better delegate the actions.
Also, having these structs in the public header prevents us from
embedding the private data in the object structure itself.
It has thus an runtime overhead and is less convenient for debugging (it's
hard to find the private data pointer in gdb).
Most importantly, there is no easy way to find the offset of the private
data fields, short of calling NM_CLIENT_GET_PRIVATE() -- which currently
is a macro. Later we want to generically lookup the offset of the
private data, we would need NM_CLIENT_GET_PRIVATE() as a function.
Instead, by having an internally, statically known offset, we can use
that offset instead.
Also drop all signal hooks. They are also not useful.
This is an ABI and API change, but of something that we never wanted to
be part of the ABI/API, and which hopefull nobody is using.
2019-10-18 07:44:31 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
nm_assert(NM_IN_SET((NMLDBusObjState) dbobj->obj_state,
|
|
|
|
|
NML_DBUS_OBJ_STATE_WATCHED_ONLY,
|
|
|
|
|
NML_DBUS_OBJ_STATE_ON_DBUS,
|
|
|
|
|
NML_DBUS_OBJ_STATE_WITH_NMOBJ_READY));
|
libnm: hide NMClient struct from public headers and use direct private field
Having the NMClient/NMClientClass structs in the public header allows
the user to subclass these types. Subclassing this type was never
intended, nor is it supported, nor does it seem useful. Subclassing only
makes sense if the type has suitable hooks to extend the type in a
meaningful way. NMClient hasn't, and everybody trying to derive from
this class would better delegate the actions.
Also, having these structs in the public header prevents us from
embedding the private data in the object structure itself.
It has thus an runtime overhead and is less convenient for debugging (it's
hard to find the private data pointer in gdb).
Most importantly, there is no easy way to find the offset of the private
data fields, short of calling NM_CLIENT_GET_PRIVATE() -- which currently
is a macro. Later we want to generically lookup the offset of the
private data, we would need NM_CLIENT_GET_PRIVATE() as a function.
Instead, by having an internally, statically known offset, we can use
that offset instead.
Also drop all signal hooks. They are also not useful.
This is an ABI and API change, but of something that we never wanted to
be part of the ABI/API, and which hopefull nobody is using.
2019-10-18 07:44:31 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
_dbobjs_obj_watcher_unregister(self, g_steal_pointer(&request_data->obj_watcher));
|
2019-10-11 11:58:09 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
nm_clear_g_signal_handler(g_task_get_cancellable(request_data->task),
|
|
|
|
|
&request_data->cancellable_id);
|
2019-10-11 11:58:09 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
_nm_client_notify_event_queue_with_ptr(self,
|
|
|
|
|
NM_CLIENT_NOTIFY_EVENT_PRIO_AFTER + 30,
|
|
|
|
|
_request_wait_complete_cb,
|
|
|
|
|
request_data);
|
|
|
|
|
}
|
2019-10-11 11:58:09 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
static void
|
|
|
|
|
_request_wait_cancelled_cb(GCancellable *cancellable, gpointer user_data)
|
|
|
|
|
{
|
|
|
|
|
RequestWaitData *request_data = user_data;
|
2021-11-09 13:28:54 +01:00
|
|
|
NMClient *self;
|
|
|
|
|
GError *error = NULL;
|
2019-10-11 11:58:09 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
nm_assert(cancellable == g_task_get_cancellable(request_data->task));
|
2019-10-11 11:58:09 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
nm_utils_error_set_cancelled(&error, FALSE, NULL);
|
2016-10-18 16:35:07 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
self = g_task_get_source_object(request_data->task);
|
2014-07-24 08:53:33 -04:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
nm_clear_g_signal_handler(cancellable, &request_data->cancellable_id);
|
2014-07-24 08:53:33 -04:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
_dbobjs_obj_watcher_unregister(self, g_steal_pointer(&request_data->obj_watcher));
|
2014-07-24 08:53:33 -04:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
g_task_return_error(request_data->task, error);
|
2019-10-04 13:56:36 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
_request_wait_data_free(request_data);
|
2019-10-04 13:56:36 +02:00
|
|
|
}
|
|
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
static void
|
2021-11-09 13:28:54 +01:00
|
|
|
_request_wait_start(GTask *task_take,
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
const char *op_name,
|
|
|
|
|
GType gtype,
|
|
|
|
|
const char *dbus_path,
|
2021-11-09 13:28:54 +01:00
|
|
|
GVariant *extra_results_take)
|
2019-10-04 13:56:36 +02:00
|
|
|
{
|
2021-11-09 13:28:54 +01:00
|
|
|
NMClient *self;
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
gs_unref_object GTask *task = g_steal_pointer(&task_take);
|
2021-11-09 13:28:54 +01:00
|
|
|
RequestWaitData *request_data;
|
|
|
|
|
GCancellable *cancellable;
|
|
|
|
|
NMLDBusObject *dbobj;
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
nm_assert(G_IS_TASK(task));
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
self = g_task_get_source_object(task);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
dbobj = _dbobjs_get_nmobj(self, dbus_path, gtype);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
if (!dbobj) {
|
|
|
|
|
NML_NMCLIENT_LOG_E(self,
|
|
|
|
|
"%s() succeeded with %s but object does not exist",
|
|
|
|
|
op_name,
|
|
|
|
|
dbus_path);
|
|
|
|
|
g_task_return_error(task,
|
|
|
|
|
g_error_new(NM_CLIENT_ERROR,
|
|
|
|
|
NM_CLIENT_ERROR_FAILED,
|
|
|
|
|
_("operation succeeded but object %s does not exist"),
|
|
|
|
|
dbus_path));
|
|
|
|
|
return;
|
|
|
|
|
}
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
request_data = g_slice_new(RequestWaitData);
|
2024-10-04 10:45:56 +02:00
|
|
|
*request_data = (RequestWaitData) {
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
.task = g_steal_pointer(&task),
|
|
|
|
|
.op_name = op_name,
|
|
|
|
|
.gtype = gtype,
|
|
|
|
|
.dbobj = nml_dbus_object_ref(dbobj),
|
|
|
|
|
.obj_watcher = NULL,
|
|
|
|
|
.extra_results = g_steal_pointer(&extra_results_take),
|
|
|
|
|
.result = NULL,
|
|
|
|
|
.cancellable_id = 0,
|
|
|
|
|
};
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
if (_request_wait_complete(self, request_data, FALSE))
|
|
|
|
|
return;
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
NML_NMCLIENT_LOG_T(self,
|
|
|
|
|
"%s() succeeded with %s. Wait for object to become ready",
|
|
|
|
|
op_name,
|
|
|
|
|
dbobj->dbus_path->str);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
request_data->obj_watcher = _dbobjs_obj_watcher_register_o(self,
|
|
|
|
|
dbobj,
|
|
|
|
|
_request_wait_obj_watcher_cb,
|
|
|
|
|
sizeof(NMLDBusObjWatcherWithPtr));
|
|
|
|
|
request_data->obj_watcher->user_data = request_data;
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
cancellable = g_task_get_cancellable(request_data->task);
|
|
|
|
|
if (cancellable) {
|
|
|
|
|
gulong id;
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
id = g_cancellable_connect(cancellable,
|
|
|
|
|
G_CALLBACK(_request_wait_cancelled_cb),
|
|
|
|
|
request_data,
|
|
|
|
|
NULL);
|
|
|
|
|
if (id == 0) {
|
|
|
|
|
/* the callback was invoked synchronously, which destroyed @request_data.
|
|
|
|
|
* We must not touch @info anymore. */
|
|
|
|
|
} else
|
|
|
|
|
request_data->cancellable_id = id;
|
|
|
|
|
}
|
|
|
|
|
}
|
2019-10-04 13:56:36 +02:00
|
|
|
|
2019-11-28 12:40:55 +01:00
|
|
|
static gpointer
|
2021-11-09 13:28:54 +01:00
|
|
|
_request_wait_finish(NMClient *client,
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
GAsyncResult *result,
|
|
|
|
|
gpointer source_tag,
|
2021-11-09 13:28:54 +01:00
|
|
|
GVariant **out_result,
|
|
|
|
|
GError **error)
|
2014-10-07 09:26:40 -04:00
|
|
|
{
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
RequestWaitData *request_data = NULL;
|
|
|
|
|
gpointer r;
|
|
|
|
|
|
|
|
|
|
g_return_val_if_fail(NM_IS_CLIENT(client), NULL);
|
|
|
|
|
g_return_val_if_fail(nm_g_task_is_valid(result, client, source_tag), NULL);
|
|
|
|
|
|
|
|
|
|
request_data = g_task_propagate_pointer(G_TASK(result), error);
|
|
|
|
|
if (!request_data) {
|
|
|
|
|
NM_SET_OUT(out_result, NULL);
|
|
|
|
|
return NULL;
|
2014-10-07 09:26:40 -04:00
|
|
|
}
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
|
|
|
|
|
nm_assert(NM_IS_OBJECT(request_data->result));
|
|
|
|
|
|
|
|
|
|
NM_SET_OUT(out_result, g_steal_pointer(&request_data->extra_results));
|
|
|
|
|
r = g_steal_pointer(&request_data->result);
|
|
|
|
|
|
2019-11-28 12:40:55 +01:00
|
|
|
nm_assert(NM_IS_OBJECT(r));
|
|
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
_request_wait_data_free(request_data);
|
|
|
|
|
return r;
|
2014-10-07 09:26:40 -04:00
|
|
|
}
|
|
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
/*****************************************************************************/
|
|
|
|
|
|
2019-12-05 18:17:43 +01:00
|
|
|
/**
|
|
|
|
|
* nm_client_get_instance_flags:
|
|
|
|
|
* @self: the #NMClient instance.
|
|
|
|
|
*
|
|
|
|
|
* Returns: the #NMClientInstanceFlags flags.
|
|
|
|
|
*
|
|
|
|
|
* Since: 1.24
|
|
|
|
|
*/
|
|
|
|
|
NMClientInstanceFlags
|
|
|
|
|
nm_client_get_instance_flags(NMClient *self)
|
|
|
|
|
{
|
|
|
|
|
g_return_val_if_fail(NM_IS_CLIENT(self), NM_CLIENT_INSTANCE_FLAGS_NONE);
|
|
|
|
|
|
|
|
|
|
return NM_CLIENT_GET_PRIVATE(self)->instance_flags;
|
|
|
|
|
}
|
|
|
|
|
|
2019-10-12 14:59:23 +02:00
|
|
|
/**
|
|
|
|
|
* nm_client_get_dbus_connection:
|
|
|
|
|
* @client: a #NMClient
|
|
|
|
|
*
|
|
|
|
|
* Gets the %GDBusConnection of the instance. This can be either passed when
|
|
|
|
|
* constructing the instance (as "dbus-connection" property), or it will be
|
|
|
|
|
* automatically initialized during async/sync init.
|
|
|
|
|
*
|
|
|
|
|
* Returns: (transfer none): the D-Bus connection of the client, or %NULL if none is set.
|
|
|
|
|
*
|
|
|
|
|
* Since: 1.22
|
|
|
|
|
**/
|
|
|
|
|
GDBusConnection *
|
|
|
|
|
nm_client_get_dbus_connection(NMClient *client)
|
|
|
|
|
{
|
|
|
|
|
g_return_val_if_fail(NM_IS_CLIENT(client), NULL);
|
|
|
|
|
|
|
|
|
|
return NM_CLIENT_GET_PRIVATE(client)->dbus_connection;
|
|
|
|
|
}
|
|
|
|
|
|
2019-10-15 16:08:36 +02:00
|
|
|
/**
|
|
|
|
|
* nm_client_get_dbus_name_owner:
|
|
|
|
|
* @client: a #NMClient
|
|
|
|
|
*
|
|
|
|
|
* Returns: (transfer none): the current name owner of the D-Bus service of NetworkManager.
|
|
|
|
|
*
|
|
|
|
|
* Since: 1.22
|
|
|
|
|
**/
|
|
|
|
|
const char *
|
|
|
|
|
nm_client_get_dbus_name_owner(NMClient *client)
|
|
|
|
|
{
|
|
|
|
|
g_return_val_if_fail(NM_IS_CLIENT(client), NULL);
|
|
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
return NM_CLIENT_GET_PRIVATE(client)->name_owner;
|
2019-10-15 16:08:36 +02:00
|
|
|
}
|
|
|
|
|
|
2014-07-24 08:53:33 -04:00
|
|
|
/**
|
2014-09-18 16:11:00 -04:00
|
|
|
* nm_client_get_version:
|
2014-07-24 08:53:33 -04:00
|
|
|
* @client: a #NMClient
|
|
|
|
|
*
|
2014-09-18 16:11:00 -04:00
|
|
|
* Gets NetworkManager version.
|
2014-07-24 08:53:33 -04:00
|
|
|
*
|
2014-10-07 09:26:40 -04:00
|
|
|
* Returns: string with the version (or %NULL if NetworkManager is not running)
|
2014-07-24 08:53:33 -04:00
|
|
|
**/
|
2014-09-18 16:11:00 -04:00
|
|
|
const char *
|
|
|
|
|
nm_client_get_version(NMClient *client)
|
2014-07-24 08:53:33 -04:00
|
|
|
{
|
|
|
|
|
g_return_val_if_fail(NM_IS_CLIENT(client), NULL);
|
|
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
return NM_CLIENT_GET_PRIVATE(client)->nm.version;
|
2014-07-24 08:53:33 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
2014-09-18 16:11:00 -04:00
|
|
|
* nm_client_get_state:
|
2014-07-24 08:53:33 -04:00
|
|
|
* @client: a #NMClient
|
|
|
|
|
*
|
2014-09-18 16:11:00 -04:00
|
|
|
* Gets the current daemon state.
|
2014-07-24 08:53:33 -04:00
|
|
|
*
|
2014-09-18 16:11:00 -04:00
|
|
|
* Returns: the current %NMState
|
2014-07-24 08:53:33 -04:00
|
|
|
**/
|
2014-09-18 16:11:00 -04:00
|
|
|
NMState
|
|
|
|
|
nm_client_get_state(NMClient *client)
|
2014-07-24 08:53:33 -04:00
|
|
|
{
|
2014-09-18 16:11:00 -04:00
|
|
|
g_return_val_if_fail(NM_IS_CLIENT(client), NM_STATE_UNKNOWN);
|
2014-07-24 08:53:33 -04:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
return NM_CLIENT_GET_PRIVATE(client)->nm.state;
|
2014-07-24 08:53:33 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
2014-09-18 16:11:00 -04:00
|
|
|
* nm_client_get_startup:
|
2014-07-24 08:53:33 -04:00
|
|
|
* @client: a #NMClient
|
|
|
|
|
*
|
2014-09-18 16:11:00 -04:00
|
|
|
* Tests whether the daemon is still in the process of activating
|
|
|
|
|
* connections at startup.
|
2014-07-24 08:53:33 -04:00
|
|
|
*
|
2014-09-18 16:11:00 -04:00
|
|
|
* Returns: whether the daemon is still starting up
|
2014-07-24 08:53:33 -04:00
|
|
|
**/
|
2014-09-18 16:11:00 -04:00
|
|
|
gboolean
|
|
|
|
|
nm_client_get_startup(NMClient *client)
|
2014-07-24 08:53:33 -04:00
|
|
|
{
|
2016-10-18 16:35:07 +02:00
|
|
|
g_return_val_if_fail(NM_IS_CLIENT(client), FALSE);
|
|
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
return NM_CLIENT_GET_PRIVATE(client)->nm.startup;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
2023-01-16 08:10:37 +01:00
|
|
|
_set_nm_running(NMClient *self, gboolean queue_notify)
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
{
|
|
|
|
|
NMClientPrivate *priv = NM_CLIENT_GET_PRIVATE(self);
|
|
|
|
|
gboolean nm_running;
|
2014-07-24 08:53:33 -04:00
|
|
|
|
2023-01-16 08:10:37 +01:00
|
|
|
nm_running = priv->dbobj_nm && priv->name_owner && !priv->get_managed_objects_cancellable;
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
if (priv->nm_running != nm_running) {
|
|
|
|
|
priv->nm_running = nm_running;
|
2023-01-16 08:10:37 +01:00
|
|
|
if (queue_notify) {
|
|
|
|
|
_nm_client_queue_notify_object(self, self, obj_properties[PROP_NM_RUNNING]);
|
|
|
|
|
} else
|
|
|
|
|
_notify(self, PROP_NM_RUNNING);
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
}
|
2014-07-24 08:53:33 -04:00
|
|
|
}
|
|
|
|
|
|
2014-09-18 16:11:00 -04:00
|
|
|
/**
|
|
|
|
|
* nm_client_get_nm_running:
|
|
|
|
|
* @client: a #NMClient
|
|
|
|
|
*
|
|
|
|
|
* Determines whether the daemon is running.
|
|
|
|
|
*
|
|
|
|
|
* Returns: %TRUE if the daemon is running
|
|
|
|
|
**/
|
|
|
|
|
gboolean
|
|
|
|
|
nm_client_get_nm_running(NMClient *client)
|
2014-07-24 08:53:33 -04:00
|
|
|
{
|
2014-09-18 16:11:00 -04:00
|
|
|
g_return_val_if_fail(NM_IS_CLIENT(client), FALSE);
|
2014-09-11 16:27:13 -04:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
return NM_CLIENT_GET_PRIVATE(client)->nm_running;
|
|
|
|
|
}
|
|
|
|
|
|
2020-01-04 07:58:58 +01:00
|
|
|
/**
|
|
|
|
|
* nm_client_get_object_by_path:
|
|
|
|
|
* @client: the #NMClient instance
|
|
|
|
|
* @dbus_path: the D-Bus path of the object to look up
|
|
|
|
|
*
|
|
|
|
|
* Returns: (transfer none): the #NMObject instance that is
|
|
|
|
|
* cached under @dbus_path, or %NULL if no such object exists.
|
|
|
|
|
*
|
|
|
|
|
* Since: 1.24
|
|
|
|
|
*/
|
|
|
|
|
NMObject *
|
|
|
|
|
nm_client_get_object_by_path(NMClient *client, const char *dbus_path)
|
|
|
|
|
{
|
|
|
|
|
g_return_val_if_fail(NM_IS_CLIENT(client), NULL);
|
|
|
|
|
g_return_val_if_fail(dbus_path, NULL);
|
|
|
|
|
|
|
|
|
|
return _dbobjs_get_nmobj_unpack_visible(client, dbus_path, G_TYPE_NONE);
|
|
|
|
|
}
|
|
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
/**
|
|
|
|
|
* nm_client_get_metered:
|
|
|
|
|
* @client: a #NMClient
|
|
|
|
|
*
|
|
|
|
|
* Returns: whether the default route is metered.
|
|
|
|
|
*
|
|
|
|
|
* Since: 1.22
|
|
|
|
|
*/
|
|
|
|
|
NMMetered
|
|
|
|
|
nm_client_get_metered(NMClient *client)
|
|
|
|
|
{
|
|
|
|
|
g_return_val_if_fail(NM_IS_CLIENT(client), NM_METERED_UNKNOWN);
|
|
|
|
|
|
|
|
|
|
return NM_CLIENT_GET_PRIVATE(client)->nm.metered;
|
2014-07-24 08:53:33 -04:00
|
|
|
}
|
|
|
|
|
|
2014-09-18 16:11:00 -04:00
|
|
|
/**
|
|
|
|
|
* nm_client_networking_get_enabled:
|
|
|
|
|
* @client: a #NMClient
|
|
|
|
|
*
|
|
|
|
|
* Whether networking is enabled or disabled.
|
|
|
|
|
*
|
|
|
|
|
* Returns: %TRUE if networking is enabled, %FALSE if networking is disabled
|
|
|
|
|
**/
|
|
|
|
|
gboolean
|
|
|
|
|
nm_client_networking_get_enabled(NMClient *client)
|
2014-07-24 08:53:33 -04:00
|
|
|
{
|
2014-09-18 16:11:00 -04:00
|
|
|
g_return_val_if_fail(NM_IS_CLIENT(client), FALSE);
|
2014-07-24 08:53:33 -04:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
return NM_CLIENT_GET_PRIVATE(client)->nm.networking_enabled;
|
2014-07-24 08:53:33 -04:00
|
|
|
}
|
|
|
|
|
|
2014-09-18 16:11:00 -04:00
|
|
|
/**
|
|
|
|
|
* nm_client_networking_set_enabled:
|
|
|
|
|
* @client: a #NMClient
|
|
|
|
|
* @enabled: %TRUE to set networking enabled, %FALSE to set networking disabled
|
2023-03-01 01:21:38 +01:00
|
|
|
* @error: return location for a #GError, or %NULL
|
2014-09-18 16:11:00 -04:00
|
|
|
*
|
|
|
|
|
* Enables or disables networking. When networking is disabled, all controlled
|
|
|
|
|
* interfaces are disconnected and deactivated. When networking is enabled,
|
|
|
|
|
* all controlled interfaces are available for activation.
|
|
|
|
|
*
|
|
|
|
|
* Returns: %TRUE on success, %FALSE otherwise
|
libnm: deprecate synchronous/blocking API in libnm
Note that D-Bus is fundamentally asynchronous. Doing blocking calls
on top of D-Bus is odd, especially for libnm's NMClient. That is because
NMClient essentially is a client-side cache of the objects from the D-Bus
interface. This cache should be filled exclusively by (asynchronous) D-Bus
events (PropertiesChanged). So, making a blocking D-Bus call means to wait
for a response and return it, while queuing all messages that are received
in the meantime.
Basically there are three ways how a synchronous API on NMClient could behave:
1) the call just calls g_dbus_connection_call_sync(). This means
that libnm sends a D-Bus request via GDBusConnection, and blockingly
waits for the response. All D-Bus messages that get received in the
meantime are queued in the GMainContext that belongs to NMClient.
That means, none of these D-Bus events are processed until we
iterate the GMainContext after the call returns. The effect is,
that NMClient (and all cached objects in there) are unaffected by
the D-Bus request.
Most of the synchronous API calls in libnm are of this kind.
The problem is that the strict ordering of D-Bus events gets
violated.
For some API this is not an immediate problem. Take for example
nm_device_wifi_request_scan(). The call merely blockingly tells
NetworkManager to start scanning, but since NetworkManager's D-Bus
API does not directly expose any state that tells whether we are
currently scanning, this out of order processing of the D-Bus
request is a small issue.
The problem is more obvious for nm_client_networking_set_enabled().
After calling it, NM_CLIENT_NETWORKING_ENABLED is still unaffected
and unchanged, because the PropertiesChanged signal from D-Bus
is not yet processed.
This means, while you make such a blocking call, NMClient's state
does not change. But usually you perform the synchronous call
to change some state. In this form, the blocking call is not useful,
because NMClient only changes the state after iterating the GMainContext,
and not after the blocking call returns.
2) like 1), but after making the blocking g_dbus_connection_call_sync(),
update the NMClient cache artificially. This is what
nm_manager_check_connectivity() does, to "fix" bgo#784629.
This also has the problem of out-of-order events, but it kinda
solves the problem of not changing the state during the blocking
call. But it does so by hacking the state of the cache. I think
this is really wrong because the state should only be updated from
the ordered stream of D-Bus messages (PropertiesChanged signal and
similar). When libnm decides to modify the state, there may be already
D-Bus messages queued that affect this very state.
3) instead of calling g_dbus_connection_call_sync(), use the
asynchronous g_dbus_connection_call(). If we would use a sepaate
GMainContext for all D-Bus related calls, we could ensure that
while we block for the response, we iterate that internal main context.
This might be nice, because all events are processed in order and
after the blocking call returns, the NMClient state is up to date.
The are problems however: current blocking API does not do this,
so it's a significant change in behavior. Also, it might be
unexpected to the user that during the blocking call the entire
content of NMClient's cache might change and all pointers to the
cache might be invalidated. Also, of course NMClient would invoke
signals for all the changes that happen.
Another problem is that this would be more effort to implement
and it involves a small performance overhead for all D-Bus related
calls (because we have to serialize all events in an internal
GMainContext first and then invoke them on the caller's context).
Also, if the users wants this behavior, they could implement it themself
by running libnm in their own GMainContext. Note that libnm might
have bugs to make that really working, but that should be fixed
instead of adding such synchrnous API behavior.
Read also [1], for why blocking calls are wrong.
[1] https://smcv.pseudorandom.co.uk/2008/11/nonblocking/
So, all possible behaviors for synchronous API have severe behavioural
issues. Mark all this API as deprecated. Also, this serves the purpose of
identifying blocking D-Bus calls in libnm.
Note that "deprecated" here does not really mean that the API is going
to be removed. We don't break API. The user may:
- continue to use this API. It's deprecated, awkward and discouraged,
but if it works, by all means use it.
- use asynchronous API. That's the only sensible way to use D-Bus.
If libnm lacks a certain asynchronous counterpart, it should be
added.
- use GDBusConnection directly. There really isn't anything wrong
with D-Bus or GDBusConnection. This deprecated API is just a wrapper
around g_dbus_connection_call_sync(). You may call it directly
without feeling dirty.
---
The only other remainging API is the synchronous GInitable call for
NMClient. That is an entirely separate beast and not particularly
wrong (from an API point of view).
Note that synchronous API in NMSecretAgentOld, NMVpnPluginOld and
NMVpnServicePlugin as not deprecated here. These types are not part
of the D-Bus cache and while they have similar issues, it's less severe
because they have less state.
2019-09-04 13:58:43 +02:00
|
|
|
*
|
libnm: advise using D-Bus instead of deprecated synchronous methods
With 1.22, various synchronous functions for invoking D-Bus methods were
deprecated. The reason was that D-Bus is fundamentally asynchronous, and
providing synchronous API in NMClient is inherently wrong. That is
because NMClient essentially is a cache of the D-Bus API, and invoking
g_dbus_connection_call_sync() messes up the order of events from D-Bus.
In particular, when the synchronous function completes, the content of
the cache does not yet reflect the change.
Since they got deprecated, the question is with what to replace them.
Instead of adding a (e.g.) nm_client_networking_set_enabled_async()
for nm_client_networking_set_enabled(), just expect the user to call
D-Bus directly.
D-Bus itself defines a reasonable API, and with GDBusConnection it
is fine (and convenient) to just call D-Bus operations directly.
Often libraries try to abstract D-Bus by providing convenience
wrappers around D-Bus API. I think that often is wrong and unnecessary.
Note that libnm's NMClient does a lot more than just wrapping simple
D-Bus calls. It provides a complete client-side cache of the D-Bus
interface. As such, what libnm's NMClient does is more than simple
wrappers around D-Bus. NMClient is a reasonable thing to do.
However, it is unnecessary to add API like nm_client_networking_set_enabled_async()
that only calls g_dbus_connection_call(). Don't pretend that we would need such
trivial wrappers in libnm.
Instead, recommend to use g_dbus_connection_call(). Or alternatively,
the convenience wrappers nm_client_dbus_call() and
nm_client_dbus_set_property().
2020-03-13 18:24:37 +01:00
|
|
|
* Deprecated: 1.22: Use the async command nm_client_dbus_call() on %NM_DBUS_PATH,
|
|
|
|
|
* %NM_DBUS_INTERFACE to call "Enable" with "(b)" arguments and no return value.
|
2014-09-18 16:11:00 -04:00
|
|
|
**/
|
|
|
|
|
gboolean
|
|
|
|
|
nm_client_networking_set_enabled(NMClient *client, gboolean enable, GError **error)
|
2014-09-11 16:27:13 -04:00
|
|
|
{
|
2014-09-18 16:11:00 -04:00
|
|
|
g_return_val_if_fail(NM_IS_CLIENT(client), FALSE);
|
2014-09-11 16:27:13 -04:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
return _nm_client_dbus_call_sync_void(client,
|
|
|
|
|
NULL,
|
|
|
|
|
NM_DBUS_PATH,
|
|
|
|
|
NM_DBUS_INTERFACE,
|
|
|
|
|
"Enable",
|
|
|
|
|
g_variant_new("(b)", enable),
|
|
|
|
|
G_DBUS_CALL_FLAGS_NONE,
|
|
|
|
|
NM_DBUS_DEFAULT_TIMEOUT_MSEC,
|
|
|
|
|
TRUE,
|
|
|
|
|
error);
|
2014-09-11 16:27:13 -04:00
|
|
|
}
|
|
|
|
|
|
2014-09-18 16:11:00 -04:00
|
|
|
/**
|
|
|
|
|
* nm_client_wireless_get_enabled:
|
|
|
|
|
* @client: a #NMClient
|
|
|
|
|
*
|
|
|
|
|
* Determines whether the wireless is enabled.
|
|
|
|
|
*
|
|
|
|
|
* Returns: %TRUE if wireless is enabled
|
|
|
|
|
**/
|
|
|
|
|
gboolean
|
|
|
|
|
nm_client_wireless_get_enabled(NMClient *client)
|
2014-07-24 08:53:33 -04:00
|
|
|
{
|
2014-09-18 16:11:00 -04:00
|
|
|
g_return_val_if_fail(NM_IS_CLIENT(client), FALSE);
|
2014-09-11 16:27:13 -04:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
return NM_CLIENT_GET_PRIVATE(client)->nm.wireless_enabled;
|
2014-07-24 08:53:33 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
2014-09-18 16:11:00 -04:00
|
|
|
* nm_client_wireless_set_enabled:
|
2014-07-24 08:53:33 -04:00
|
|
|
* @client: a #NMClient
|
2014-09-18 16:11:00 -04:00
|
|
|
* @enabled: %TRUE to enable wireless
|
2014-09-11 16:27:13 -04:00
|
|
|
*
|
2014-09-18 16:11:00 -04:00
|
|
|
* Enables or disables wireless devices.
|
libnm: deprecate synchronous/blocking API in libnm
Note that D-Bus is fundamentally asynchronous. Doing blocking calls
on top of D-Bus is odd, especially for libnm's NMClient. That is because
NMClient essentially is a client-side cache of the objects from the D-Bus
interface. This cache should be filled exclusively by (asynchronous) D-Bus
events (PropertiesChanged). So, making a blocking D-Bus call means to wait
for a response and return it, while queuing all messages that are received
in the meantime.
Basically there are three ways how a synchronous API on NMClient could behave:
1) the call just calls g_dbus_connection_call_sync(). This means
that libnm sends a D-Bus request via GDBusConnection, and blockingly
waits for the response. All D-Bus messages that get received in the
meantime are queued in the GMainContext that belongs to NMClient.
That means, none of these D-Bus events are processed until we
iterate the GMainContext after the call returns. The effect is,
that NMClient (and all cached objects in there) are unaffected by
the D-Bus request.
Most of the synchronous API calls in libnm are of this kind.
The problem is that the strict ordering of D-Bus events gets
violated.
For some API this is not an immediate problem. Take for example
nm_device_wifi_request_scan(). The call merely blockingly tells
NetworkManager to start scanning, but since NetworkManager's D-Bus
API does not directly expose any state that tells whether we are
currently scanning, this out of order processing of the D-Bus
request is a small issue.
The problem is more obvious for nm_client_networking_set_enabled().
After calling it, NM_CLIENT_NETWORKING_ENABLED is still unaffected
and unchanged, because the PropertiesChanged signal from D-Bus
is not yet processed.
This means, while you make such a blocking call, NMClient's state
does not change. But usually you perform the synchronous call
to change some state. In this form, the blocking call is not useful,
because NMClient only changes the state after iterating the GMainContext,
and not after the blocking call returns.
2) like 1), but after making the blocking g_dbus_connection_call_sync(),
update the NMClient cache artificially. This is what
nm_manager_check_connectivity() does, to "fix" bgo#784629.
This also has the problem of out-of-order events, but it kinda
solves the problem of not changing the state during the blocking
call. But it does so by hacking the state of the cache. I think
this is really wrong because the state should only be updated from
the ordered stream of D-Bus messages (PropertiesChanged signal and
similar). When libnm decides to modify the state, there may be already
D-Bus messages queued that affect this very state.
3) instead of calling g_dbus_connection_call_sync(), use the
asynchronous g_dbus_connection_call(). If we would use a sepaate
GMainContext for all D-Bus related calls, we could ensure that
while we block for the response, we iterate that internal main context.
This might be nice, because all events are processed in order and
after the blocking call returns, the NMClient state is up to date.
The are problems however: current blocking API does not do this,
so it's a significant change in behavior. Also, it might be
unexpected to the user that during the blocking call the entire
content of NMClient's cache might change and all pointers to the
cache might be invalidated. Also, of course NMClient would invoke
signals for all the changes that happen.
Another problem is that this would be more effort to implement
and it involves a small performance overhead for all D-Bus related
calls (because we have to serialize all events in an internal
GMainContext first and then invoke them on the caller's context).
Also, if the users wants this behavior, they could implement it themself
by running libnm in their own GMainContext. Note that libnm might
have bugs to make that really working, but that should be fixed
instead of adding such synchrnous API behavior.
Read also [1], for why blocking calls are wrong.
[1] https://smcv.pseudorandom.co.uk/2008/11/nonblocking/
So, all possible behaviors for synchronous API have severe behavioural
issues. Mark all this API as deprecated. Also, this serves the purpose of
identifying blocking D-Bus calls in libnm.
Note that "deprecated" here does not really mean that the API is going
to be removed. We don't break API. The user may:
- continue to use this API. It's deprecated, awkward and discouraged,
but if it works, by all means use it.
- use asynchronous API. That's the only sensible way to use D-Bus.
If libnm lacks a certain asynchronous counterpart, it should be
added.
- use GDBusConnection directly. There really isn't anything wrong
with D-Bus or GDBusConnection. This deprecated API is just a wrapper
around g_dbus_connection_call_sync(). You may call it directly
without feeling dirty.
---
The only other remainging API is the synchronous GInitable call for
NMClient. That is an entirely separate beast and not particularly
wrong (from an API point of view).
Note that synchronous API in NMSecretAgentOld, NMVpnPluginOld and
NMVpnServicePlugin as not deprecated here. These types are not part
of the D-Bus cache and while they have similar issues, it's less severe
because they have less state.
2019-09-04 13:58:43 +02:00
|
|
|
*
|
libnm: advise using D-Bus instead of deprecated synchronous methods
With 1.22, various synchronous functions for invoking D-Bus methods were
deprecated. The reason was that D-Bus is fundamentally asynchronous, and
providing synchronous API in NMClient is inherently wrong. That is
because NMClient essentially is a cache of the D-Bus API, and invoking
g_dbus_connection_call_sync() messes up the order of events from D-Bus.
In particular, when the synchronous function completes, the content of
the cache does not yet reflect the change.
Since they got deprecated, the question is with what to replace them.
Instead of adding a (e.g.) nm_client_networking_set_enabled_async()
for nm_client_networking_set_enabled(), just expect the user to call
D-Bus directly.
D-Bus itself defines a reasonable API, and with GDBusConnection it
is fine (and convenient) to just call D-Bus operations directly.
Often libraries try to abstract D-Bus by providing convenience
wrappers around D-Bus API. I think that often is wrong and unnecessary.
Note that libnm's NMClient does a lot more than just wrapping simple
D-Bus calls. It provides a complete client-side cache of the D-Bus
interface. As such, what libnm's NMClient does is more than simple
wrappers around D-Bus. NMClient is a reasonable thing to do.
However, it is unnecessary to add API like nm_client_networking_set_enabled_async()
that only calls g_dbus_connection_call(). Don't pretend that we would need such
trivial wrappers in libnm.
Instead, recommend to use g_dbus_connection_call(). Or alternatively,
the convenience wrappers nm_client_dbus_call() and
nm_client_dbus_set_property().
2020-03-13 18:24:37 +01:00
|
|
|
* Deprecated: 1.22: Use the async command nm_client_dbus_set_property() on %NM_DBUS_PATH,
|
|
|
|
|
* %NM_DBUS_INTERFACE to set "WirelessEnabled" property to a "(b)" value.
|
libnm: deprecate synchronous/blocking API in libnm
Note that D-Bus is fundamentally asynchronous. Doing blocking calls
on top of D-Bus is odd, especially for libnm's NMClient. That is because
NMClient essentially is a client-side cache of the objects from the D-Bus
interface. This cache should be filled exclusively by (asynchronous) D-Bus
events (PropertiesChanged). So, making a blocking D-Bus call means to wait
for a response and return it, while queuing all messages that are received
in the meantime.
Basically there are three ways how a synchronous API on NMClient could behave:
1) the call just calls g_dbus_connection_call_sync(). This means
that libnm sends a D-Bus request via GDBusConnection, and blockingly
waits for the response. All D-Bus messages that get received in the
meantime are queued in the GMainContext that belongs to NMClient.
That means, none of these D-Bus events are processed until we
iterate the GMainContext after the call returns. The effect is,
that NMClient (and all cached objects in there) are unaffected by
the D-Bus request.
Most of the synchronous API calls in libnm are of this kind.
The problem is that the strict ordering of D-Bus events gets
violated.
For some API this is not an immediate problem. Take for example
nm_device_wifi_request_scan(). The call merely blockingly tells
NetworkManager to start scanning, but since NetworkManager's D-Bus
API does not directly expose any state that tells whether we are
currently scanning, this out of order processing of the D-Bus
request is a small issue.
The problem is more obvious for nm_client_networking_set_enabled().
After calling it, NM_CLIENT_NETWORKING_ENABLED is still unaffected
and unchanged, because the PropertiesChanged signal from D-Bus
is not yet processed.
This means, while you make such a blocking call, NMClient's state
does not change. But usually you perform the synchronous call
to change some state. In this form, the blocking call is not useful,
because NMClient only changes the state after iterating the GMainContext,
and not after the blocking call returns.
2) like 1), but after making the blocking g_dbus_connection_call_sync(),
update the NMClient cache artificially. This is what
nm_manager_check_connectivity() does, to "fix" bgo#784629.
This also has the problem of out-of-order events, but it kinda
solves the problem of not changing the state during the blocking
call. But it does so by hacking the state of the cache. I think
this is really wrong because the state should only be updated from
the ordered stream of D-Bus messages (PropertiesChanged signal and
similar). When libnm decides to modify the state, there may be already
D-Bus messages queued that affect this very state.
3) instead of calling g_dbus_connection_call_sync(), use the
asynchronous g_dbus_connection_call(). If we would use a sepaate
GMainContext for all D-Bus related calls, we could ensure that
while we block for the response, we iterate that internal main context.
This might be nice, because all events are processed in order and
after the blocking call returns, the NMClient state is up to date.
The are problems however: current blocking API does not do this,
so it's a significant change in behavior. Also, it might be
unexpected to the user that during the blocking call the entire
content of NMClient's cache might change and all pointers to the
cache might be invalidated. Also, of course NMClient would invoke
signals for all the changes that happen.
Another problem is that this would be more effort to implement
and it involves a small performance overhead for all D-Bus related
calls (because we have to serialize all events in an internal
GMainContext first and then invoke them on the caller's context).
Also, if the users wants this behavior, they could implement it themself
by running libnm in their own GMainContext. Note that libnm might
have bugs to make that really working, but that should be fixed
instead of adding such synchrnous API behavior.
Read also [1], for why blocking calls are wrong.
[1] https://smcv.pseudorandom.co.uk/2008/11/nonblocking/
So, all possible behaviors for synchronous API have severe behavioural
issues. Mark all this API as deprecated. Also, this serves the purpose of
identifying blocking D-Bus calls in libnm.
Note that "deprecated" here does not really mean that the API is going
to be removed. We don't break API. The user may:
- continue to use this API. It's deprecated, awkward and discouraged,
but if it works, by all means use it.
- use asynchronous API. That's the only sensible way to use D-Bus.
If libnm lacks a certain asynchronous counterpart, it should be
added.
- use GDBusConnection directly. There really isn't anything wrong
with D-Bus or GDBusConnection. This deprecated API is just a wrapper
around g_dbus_connection_call_sync(). You may call it directly
without feeling dirty.
---
The only other remainging API is the synchronous GInitable call for
NMClient. That is an entirely separate beast and not particularly
wrong (from an API point of view).
Note that synchronous API in NMSecretAgentOld, NMVpnPluginOld and
NMVpnServicePlugin as not deprecated here. These types are not part
of the D-Bus cache and while they have similar issues, it's less severe
because they have less state.
2019-09-04 13:58:43 +02:00
|
|
|
*/
|
2014-07-24 08:53:33 -04:00
|
|
|
void
|
2014-09-18 16:11:00 -04:00
|
|
|
nm_client_wireless_set_enabled(NMClient *client, gboolean enabled)
|
2014-07-24 08:53:33 -04:00
|
|
|
{
|
|
|
|
|
g_return_if_fail(NM_IS_CLIENT(client));
|
|
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
_nm_client_set_property_sync_legacy(client,
|
|
|
|
|
NM_DBUS_PATH,
|
|
|
|
|
NM_DBUS_INTERFACE,
|
|
|
|
|
"WirelessEnabled",
|
|
|
|
|
"b",
|
|
|
|
|
enabled);
|
2014-07-24 08:53:33 -04:00
|
|
|
}
|
|
|
|
|
|
2014-09-11 16:27:13 -04:00
|
|
|
/**
|
2014-09-18 16:11:00 -04:00
|
|
|
* nm_client_wireless_hardware_get_enabled:
|
|
|
|
|
* @client: a #NMClient
|
2014-09-11 16:27:13 -04:00
|
|
|
*
|
2014-09-18 16:11:00 -04:00
|
|
|
* Determines whether the wireless hardware is enabled.
|
2014-09-11 16:27:13 -04:00
|
|
|
*
|
2014-09-18 16:11:00 -04:00
|
|
|
* Returns: %TRUE if the wireless hardware is enabled
|
2014-09-11 16:27:13 -04:00
|
|
|
**/
|
2014-09-18 16:11:00 -04:00
|
|
|
gboolean
|
|
|
|
|
nm_client_wireless_hardware_get_enabled(NMClient *client)
|
2014-09-11 16:27:13 -04:00
|
|
|
{
|
2014-09-18 16:11:00 -04:00
|
|
|
g_return_val_if_fail(NM_IS_CLIENT(client), FALSE);
|
2014-09-11 16:27:13 -04:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
return NM_CLIENT_GET_PRIVATE(client)->nm.wireless_hardware_enabled;
|
2014-09-11 16:27:13 -04:00
|
|
|
}
|
|
|
|
|
|
2022-03-21 10:20:11 +01:00
|
|
|
/**
|
|
|
|
|
* nm_client_get_radio_flags:
|
|
|
|
|
* @client: a #NMClient
|
|
|
|
|
*
|
|
|
|
|
* Get radio flags.
|
|
|
|
|
*
|
|
|
|
|
* Returns: the #NMRadioFlags.
|
|
|
|
|
*
|
|
|
|
|
* Since: 1.38
|
|
|
|
|
**/
|
|
|
|
|
NMRadioFlags
|
|
|
|
|
nm_client_get_radio_flags(NMClient *client)
|
|
|
|
|
{
|
|
|
|
|
g_return_val_if_fail(NM_IS_CLIENT(client), NM_RADIO_FLAG_NONE);
|
|
|
|
|
|
|
|
|
|
return NM_CLIENT_GET_PRIVATE(client)->nm.radio_flags;
|
|
|
|
|
}
|
|
|
|
|
|
2014-09-18 16:11:00 -04:00
|
|
|
/**
|
|
|
|
|
* nm_client_wwan_get_enabled:
|
|
|
|
|
* @client: a #NMClient
|
|
|
|
|
*
|
|
|
|
|
* Determines whether WWAN is enabled.
|
|
|
|
|
*
|
|
|
|
|
* Returns: %TRUE if WWAN is enabled
|
|
|
|
|
**/
|
|
|
|
|
gboolean
|
|
|
|
|
nm_client_wwan_get_enabled(NMClient *client)
|
2014-07-24 08:53:33 -04:00
|
|
|
{
|
2014-09-18 16:11:00 -04:00
|
|
|
g_return_val_if_fail(NM_IS_CLIENT(client), FALSE);
|
2014-09-11 16:27:13 -04:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
return NM_CLIENT_GET_PRIVATE(client)->nm.wwan_enabled;
|
2014-07-24 08:53:33 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
2014-09-18 16:11:00 -04:00
|
|
|
* nm_client_wwan_set_enabled:
|
2014-07-24 08:53:33 -04:00
|
|
|
* @client: a #NMClient
|
2014-09-18 16:11:00 -04:00
|
|
|
* @enabled: %TRUE to enable WWAN
|
2014-09-11 16:27:13 -04:00
|
|
|
*
|
2014-09-18 16:11:00 -04:00
|
|
|
* Enables or disables WWAN devices.
|
libnm: advise using D-Bus instead of deprecated synchronous methods
With 1.22, various synchronous functions for invoking D-Bus methods were
deprecated. The reason was that D-Bus is fundamentally asynchronous, and
providing synchronous API in NMClient is inherently wrong. That is
because NMClient essentially is a cache of the D-Bus API, and invoking
g_dbus_connection_call_sync() messes up the order of events from D-Bus.
In particular, when the synchronous function completes, the content of
the cache does not yet reflect the change.
Since they got deprecated, the question is with what to replace them.
Instead of adding a (e.g.) nm_client_networking_set_enabled_async()
for nm_client_networking_set_enabled(), just expect the user to call
D-Bus directly.
D-Bus itself defines a reasonable API, and with GDBusConnection it
is fine (and convenient) to just call D-Bus operations directly.
Often libraries try to abstract D-Bus by providing convenience
wrappers around D-Bus API. I think that often is wrong and unnecessary.
Note that libnm's NMClient does a lot more than just wrapping simple
D-Bus calls. It provides a complete client-side cache of the D-Bus
interface. As such, what libnm's NMClient does is more than simple
wrappers around D-Bus. NMClient is a reasonable thing to do.
However, it is unnecessary to add API like nm_client_networking_set_enabled_async()
that only calls g_dbus_connection_call(). Don't pretend that we would need such
trivial wrappers in libnm.
Instead, recommend to use g_dbus_connection_call(). Or alternatively,
the convenience wrappers nm_client_dbus_call() and
nm_client_dbus_set_property().
2020-03-13 18:24:37 +01:00
|
|
|
*
|
|
|
|
|
* Deprecated: 1.22: Use the async command nm_client_dbus_set_property() on %NM_DBUS_PATH,
|
|
|
|
|
* %NM_DBUS_INTERFACE to set "WwanEnabled" property to a "(b)" value.
|
2014-07-24 08:53:33 -04:00
|
|
|
**/
|
|
|
|
|
void
|
2014-09-18 16:11:00 -04:00
|
|
|
nm_client_wwan_set_enabled(NMClient *client, gboolean enabled)
|
2014-07-24 08:53:33 -04:00
|
|
|
{
|
|
|
|
|
g_return_if_fail(NM_IS_CLIENT(client));
|
2014-09-11 16:27:13 -04:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
_nm_client_set_property_sync_legacy(client,
|
|
|
|
|
NM_DBUS_PATH,
|
|
|
|
|
NM_DBUS_INTERFACE,
|
|
|
|
|
"WwanEnabled",
|
|
|
|
|
"b",
|
|
|
|
|
enabled);
|
2014-09-11 16:27:13 -04:00
|
|
|
}
|
2014-07-24 08:53:33 -04:00
|
|
|
|
2014-09-11 16:27:13 -04:00
|
|
|
/**
|
2014-09-18 16:11:00 -04:00
|
|
|
* nm_client_wwan_hardware_get_enabled:
|
|
|
|
|
* @client: a #NMClient
|
2014-09-11 16:27:13 -04:00
|
|
|
*
|
2014-09-18 16:11:00 -04:00
|
|
|
* Determines whether the WWAN hardware is enabled.
|
2014-09-11 16:27:13 -04:00
|
|
|
*
|
2014-09-18 16:11:00 -04:00
|
|
|
* Returns: %TRUE if the WWAN hardware is enabled
|
2014-09-11 16:27:13 -04:00
|
|
|
**/
|
2014-09-18 16:11:00 -04:00
|
|
|
gboolean
|
|
|
|
|
nm_client_wwan_hardware_get_enabled(NMClient *client)
|
2014-07-24 08:53:33 -04:00
|
|
|
{
|
2014-09-18 16:11:00 -04:00
|
|
|
g_return_val_if_fail(NM_IS_CLIENT(client), FALSE);
|
2014-07-24 08:53:33 -04:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
return NM_CLIENT_GET_PRIVATE(client)->nm.wwan_hardware_enabled;
|
2014-07-24 08:53:33 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
2014-09-18 16:11:00 -04:00
|
|
|
* nm_client_wimax_get_enabled:
|
2014-07-24 08:53:33 -04:00
|
|
|
* @client: a #NMClient
|
|
|
|
|
*
|
2014-09-18 16:11:00 -04:00
|
|
|
* Determines whether WiMAX is enabled.
|
2014-09-11 16:27:13 -04:00
|
|
|
*
|
2014-09-18 16:11:00 -04:00
|
|
|
* Returns: %TRUE if WiMAX is enabled
|
2019-10-29 20:05:02 +01:00
|
|
|
*
|
2020-03-13 19:45:09 +01:00
|
|
|
* Deprecated: 1.22: This function always returns FALSE because WiMax is no longer supported.
|
2014-07-24 08:53:33 -04:00
|
|
|
**/
|
2014-09-11 16:27:13 -04:00
|
|
|
gboolean
|
2014-09-18 16:11:00 -04:00
|
|
|
nm_client_wimax_get_enabled(NMClient *client)
|
2014-07-24 08:53:33 -04:00
|
|
|
{
|
2014-09-11 16:27:13 -04:00
|
|
|
g_return_val_if_fail(NM_IS_CLIENT(client), FALSE);
|
2014-09-11 16:27:13 -04:00
|
|
|
|
2019-10-29 20:05:02 +01:00
|
|
|
return FALSE;
|
2014-09-11 16:27:13 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
2014-09-18 16:11:00 -04:00
|
|
|
* nm_client_wimax_set_enabled:
|
2014-09-11 16:27:13 -04:00
|
|
|
* @client: a #NMClient
|
2014-09-18 16:11:00 -04:00
|
|
|
* @enabled: %TRUE to enable WiMAX
|
2014-09-11 16:27:13 -04:00
|
|
|
*
|
2014-09-18 16:11:00 -04:00
|
|
|
* Enables or disables WiMAX devices.
|
2019-10-29 20:05:02 +01:00
|
|
|
*
|
2020-03-13 19:45:09 +01:00
|
|
|
* Deprecated: 1.22: This function does nothing because WiMax is no longer supported.
|
2014-09-11 16:27:13 -04:00
|
|
|
**/
|
|
|
|
|
void
|
2014-09-18 16:11:00 -04:00
|
|
|
nm_client_wimax_set_enabled(NMClient *client, gboolean enabled)
|
2014-09-11 16:27:13 -04:00
|
|
|
{
|
|
|
|
|
g_return_if_fail(NM_IS_CLIENT(client));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
2014-09-18 16:11:00 -04:00
|
|
|
* nm_client_wimax_hardware_get_enabled:
|
2014-09-11 16:27:13 -04:00
|
|
|
* @client: a #NMClient
|
|
|
|
|
*
|
2014-09-18 16:11:00 -04:00
|
|
|
* Determines whether the WiMAX hardware is enabled.
|
2014-09-11 16:27:13 -04:00
|
|
|
*
|
2014-09-18 16:11:00 -04:00
|
|
|
* Returns: %TRUE if the WiMAX hardware is enabled
|
2019-10-29 20:05:02 +01:00
|
|
|
*
|
2020-03-13 19:45:09 +01:00
|
|
|
* Deprecated: 1.22: This function always returns FALSE because WiMax is no longer supported.
|
2014-09-11 16:27:13 -04:00
|
|
|
**/
|
|
|
|
|
gboolean
|
2014-09-18 16:11:00 -04:00
|
|
|
nm_client_wimax_hardware_get_enabled(NMClient *client)
|
2014-09-11 16:27:13 -04:00
|
|
|
{
|
2014-09-18 16:11:00 -04:00
|
|
|
g_return_val_if_fail(NM_IS_CLIENT(client), FALSE);
|
2014-09-11 16:27:13 -04:00
|
|
|
|
2019-10-29 20:05:02 +01:00
|
|
|
return FALSE;
|
2014-09-11 16:27:13 -04:00
|
|
|
}
|
|
|
|
|
|
2017-08-09 15:21:28 +08:00
|
|
|
/**
|
|
|
|
|
* nm_client_connectivity_check_get_available:
|
|
|
|
|
* @client: a #NMClient
|
|
|
|
|
*
|
|
|
|
|
* Determine whether connectivity checking is available. This
|
|
|
|
|
* requires that the URI of a connectivity service has been set in the
|
|
|
|
|
* configuration file.
|
|
|
|
|
*
|
|
|
|
|
* Returns: %TRUE if connectivity checking is available.
|
2017-08-17 23:08:44 +02:00
|
|
|
*
|
|
|
|
|
* Since: 1.10
|
2017-08-09 15:21:28 +08:00
|
|
|
*/
|
|
|
|
|
gboolean
|
|
|
|
|
nm_client_connectivity_check_get_available(NMClient *client)
|
|
|
|
|
{
|
|
|
|
|
g_return_val_if_fail(NM_IS_CLIENT(client), FALSE);
|
|
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
return NM_CLIENT_GET_PRIVATE(client)->nm.connectivity_check_available;
|
2017-08-09 15:21:28 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* nm_client_connectivity_check_get_enabled:
|
|
|
|
|
* @client: a #NMClient
|
|
|
|
|
*
|
|
|
|
|
* Determine whether connectivity checking is enabled.
|
|
|
|
|
*
|
|
|
|
|
* Returns: %TRUE if connectivity checking is enabled.
|
2017-08-17 23:08:44 +02:00
|
|
|
*
|
|
|
|
|
* Since: 1.10
|
2017-08-09 15:21:28 +08:00
|
|
|
*/
|
|
|
|
|
gboolean
|
|
|
|
|
nm_client_connectivity_check_get_enabled(NMClient *client)
|
|
|
|
|
{
|
|
|
|
|
g_return_val_if_fail(NM_IS_CLIENT(client), FALSE);
|
|
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
return NM_CLIENT_GET_PRIVATE(client)->nm.connectivity_check_enabled;
|
2017-08-09 15:21:28 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* nm_client_connectivity_check_set_enabled:
|
|
|
|
|
* @client: a #NMClient
|
|
|
|
|
* @enabled: %TRUE to enable connectivity checking
|
|
|
|
|
*
|
|
|
|
|
* Enable or disable connectivity checking. Note that if a
|
|
|
|
|
* connectivity checking URI has not been configured, this will not
|
|
|
|
|
* have any effect.
|
2017-08-17 23:08:44 +02:00
|
|
|
*
|
|
|
|
|
* Since: 1.10
|
libnm: advise using D-Bus instead of deprecated synchronous methods
With 1.22, various synchronous functions for invoking D-Bus methods were
deprecated. The reason was that D-Bus is fundamentally asynchronous, and
providing synchronous API in NMClient is inherently wrong. That is
because NMClient essentially is a cache of the D-Bus API, and invoking
g_dbus_connection_call_sync() messes up the order of events from D-Bus.
In particular, when the synchronous function completes, the content of
the cache does not yet reflect the change.
Since they got deprecated, the question is with what to replace them.
Instead of adding a (e.g.) nm_client_networking_set_enabled_async()
for nm_client_networking_set_enabled(), just expect the user to call
D-Bus directly.
D-Bus itself defines a reasonable API, and with GDBusConnection it
is fine (and convenient) to just call D-Bus operations directly.
Often libraries try to abstract D-Bus by providing convenience
wrappers around D-Bus API. I think that often is wrong and unnecessary.
Note that libnm's NMClient does a lot more than just wrapping simple
D-Bus calls. It provides a complete client-side cache of the D-Bus
interface. As such, what libnm's NMClient does is more than simple
wrappers around D-Bus. NMClient is a reasonable thing to do.
However, it is unnecessary to add API like nm_client_networking_set_enabled_async()
that only calls g_dbus_connection_call(). Don't pretend that we would need such
trivial wrappers in libnm.
Instead, recommend to use g_dbus_connection_call(). Or alternatively,
the convenience wrappers nm_client_dbus_call() and
nm_client_dbus_set_property().
2020-03-13 18:24:37 +01:00
|
|
|
*
|
|
|
|
|
* Deprecated: 1.22: Use the async command nm_client_dbus_set_property() on %NM_DBUS_PATH,
|
|
|
|
|
* %NM_DBUS_INTERFACE to set "ConnectivityCheckEnabled" property to a "(b)" value.
|
2017-08-09 15:21:28 +08:00
|
|
|
*/
|
|
|
|
|
void
|
|
|
|
|
nm_client_connectivity_check_set_enabled(NMClient *client, gboolean enabled)
|
|
|
|
|
{
|
|
|
|
|
g_return_if_fail(NM_IS_CLIENT(client));
|
|
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
_nm_client_set_property_sync_legacy(client,
|
|
|
|
|
NM_DBUS_PATH,
|
|
|
|
|
NM_DBUS_INTERFACE,
|
|
|
|
|
"ConnectivityCheckEnabled",
|
|
|
|
|
"b",
|
|
|
|
|
enabled);
|
2017-08-09 15:21:28 +08:00
|
|
|
}
|
|
|
|
|
|
2019-07-22 15:55:15 +01:00
|
|
|
/**
|
|
|
|
|
* nm_client_connectivity_check_get_uri:
|
|
|
|
|
* @client: a #NMClient
|
|
|
|
|
*
|
|
|
|
|
* Get the URI that will be queried to determine if there is internet
|
|
|
|
|
* connectivity.
|
|
|
|
|
*
|
|
|
|
|
* Returns: (transfer none): the connectivity URI in use
|
|
|
|
|
*
|
|
|
|
|
* Since: 1.20
|
|
|
|
|
*/
|
|
|
|
|
const char *
|
|
|
|
|
nm_client_connectivity_check_get_uri(NMClient *client)
|
|
|
|
|
{
|
|
|
|
|
g_return_val_if_fail(NM_IS_CLIENT(client), NULL);
|
|
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
return NM_CLIENT_GET_PRIVATE(client)->nm.connectivity_check_uri;
|
2019-07-22 15:55:15 +01:00
|
|
|
}
|
|
|
|
|
|
2014-07-24 08:53:33 -04:00
|
|
|
/**
|
2014-09-18 16:11:00 -04:00
|
|
|
* nm_client_get_logging:
|
2014-07-24 08:53:33 -04:00
|
|
|
* @client: a #NMClient
|
2023-03-02 12:18:30 +01:00
|
|
|
* @level: (out) (optional) (nullable): return location for logging level string
|
|
|
|
|
* @domains: (out) (optional) (nullable): return location for log domains string. The string is
|
2014-09-18 16:11:00 -04:00
|
|
|
* a list of domains separated by ","
|
2023-03-01 01:21:38 +01:00
|
|
|
* @error: return location for a #GError, or %NULL
|
2014-07-24 08:53:33 -04:00
|
|
|
*
|
2014-09-18 16:11:00 -04:00
|
|
|
* Gets NetworkManager current logging level and domains.
|
2014-07-24 08:53:33 -04:00
|
|
|
*
|
2014-09-18 16:11:00 -04:00
|
|
|
* Returns: %TRUE on success, %FALSE otherwise
|
2019-10-04 16:48:39 +02:00
|
|
|
*
|
libnm: advise using D-Bus instead of deprecated synchronous methods
With 1.22, various synchronous functions for invoking D-Bus methods were
deprecated. The reason was that D-Bus is fundamentally asynchronous, and
providing synchronous API in NMClient is inherently wrong. That is
because NMClient essentially is a cache of the D-Bus API, and invoking
g_dbus_connection_call_sync() messes up the order of events from D-Bus.
In particular, when the synchronous function completes, the content of
the cache does not yet reflect the change.
Since they got deprecated, the question is with what to replace them.
Instead of adding a (e.g.) nm_client_networking_set_enabled_async()
for nm_client_networking_set_enabled(), just expect the user to call
D-Bus directly.
D-Bus itself defines a reasonable API, and with GDBusConnection it
is fine (and convenient) to just call D-Bus operations directly.
Often libraries try to abstract D-Bus by providing convenience
wrappers around D-Bus API. I think that often is wrong and unnecessary.
Note that libnm's NMClient does a lot more than just wrapping simple
D-Bus calls. It provides a complete client-side cache of the D-Bus
interface. As such, what libnm's NMClient does is more than simple
wrappers around D-Bus. NMClient is a reasonable thing to do.
However, it is unnecessary to add API like nm_client_networking_set_enabled_async()
that only calls g_dbus_connection_call(). Don't pretend that we would need such
trivial wrappers in libnm.
Instead, recommend to use g_dbus_connection_call(). Or alternatively,
the convenience wrappers nm_client_dbus_call() and
nm_client_dbus_set_property().
2020-03-13 18:24:37 +01:00
|
|
|
* Deprecated: 1.22: Use the async command nm_client_dbus_call() on %NM_DBUS_PATH,
|
|
|
|
|
* %NM_DBUS_INTERFACE to call "GetLogging" with no arguments to get "(ss)" for level
|
|
|
|
|
* and domains.
|
2014-07-24 08:53:33 -04:00
|
|
|
**/
|
2014-09-18 16:11:00 -04:00
|
|
|
gboolean
|
2019-10-04 16:18:51 +02:00
|
|
|
nm_client_get_logging(NMClient *client, char **level, char **domains, GError **error)
|
2014-07-24 08:53:33 -04:00
|
|
|
{
|
2019-10-04 16:18:51 +02:00
|
|
|
gs_unref_variant GVariant *ret = NULL;
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2014-09-18 16:11:00 -04:00
|
|
|
g_return_val_if_fail(NM_IS_CLIENT(client), FALSE);
|
|
|
|
|
g_return_val_if_fail(level == NULL || *level == NULL, FALSE);
|
|
|
|
|
g_return_val_if_fail(domains == NULL || *domains == NULL, FALSE);
|
|
|
|
|
g_return_val_if_fail(error == NULL || *error == NULL, FALSE);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
ret = _nm_client_dbus_call_sync(client,
|
2019-10-04 16:18:51 +02:00
|
|
|
NULL,
|
|
|
|
|
NM_DBUS_PATH,
|
|
|
|
|
NM_DBUS_INTERFACE,
|
|
|
|
|
"GetLogging",
|
|
|
|
|
g_variant_new("()"),
|
|
|
|
|
G_VARIANT_TYPE("(ss)"),
|
|
|
|
|
G_DBUS_CALL_FLAGS_NONE,
|
|
|
|
|
NM_DBUS_DEFAULT_TIMEOUT_MSEC,
|
|
|
|
|
TRUE,
|
|
|
|
|
error);
|
|
|
|
|
if (!ret)
|
2014-09-18 16:11:00 -04:00
|
|
|
return FALSE;
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2019-10-04 16:18:51 +02:00
|
|
|
g_variant_get(ret, "(ss)", level, domains);
|
|
|
|
|
return TRUE;
|
2014-07-24 08:53:33 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
2014-09-18 16:11:00 -04:00
|
|
|
* nm_client_set_logging:
|
2014-07-24 08:53:33 -04:00
|
|
|
* @client: a #NMClient
|
2023-03-01 01:21:38 +01:00
|
|
|
* @level: (nullable): logging level to set (%NULL or an empty string for no change)
|
|
|
|
|
* @domains: (nullable): logging domains to set. The string should be a list of log
|
2014-09-18 16:11:00 -04:00
|
|
|
* domains separated by ",". (%NULL or an empty string for no change)
|
2023-03-01 01:21:38 +01:00
|
|
|
* @error: return location for a #GError, or %NULL
|
2014-07-24 08:53:33 -04:00
|
|
|
*
|
2014-09-18 16:11:00 -04:00
|
|
|
* Sets NetworkManager logging level and/or domains.
|
2014-07-24 08:53:33 -04:00
|
|
|
*
|
2014-09-18 16:11:00 -04:00
|
|
|
* Returns: %TRUE on success, %FALSE otherwise
|
2019-10-04 16:48:39 +02:00
|
|
|
*
|
libnm: advise using D-Bus instead of deprecated synchronous methods
With 1.22, various synchronous functions for invoking D-Bus methods were
deprecated. The reason was that D-Bus is fundamentally asynchronous, and
providing synchronous API in NMClient is inherently wrong. That is
because NMClient essentially is a cache of the D-Bus API, and invoking
g_dbus_connection_call_sync() messes up the order of events from D-Bus.
In particular, when the synchronous function completes, the content of
the cache does not yet reflect the change.
Since they got deprecated, the question is with what to replace them.
Instead of adding a (e.g.) nm_client_networking_set_enabled_async()
for nm_client_networking_set_enabled(), just expect the user to call
D-Bus directly.
D-Bus itself defines a reasonable API, and with GDBusConnection it
is fine (and convenient) to just call D-Bus operations directly.
Often libraries try to abstract D-Bus by providing convenience
wrappers around D-Bus API. I think that often is wrong and unnecessary.
Note that libnm's NMClient does a lot more than just wrapping simple
D-Bus calls. It provides a complete client-side cache of the D-Bus
interface. As such, what libnm's NMClient does is more than simple
wrappers around D-Bus. NMClient is a reasonable thing to do.
However, it is unnecessary to add API like nm_client_networking_set_enabled_async()
that only calls g_dbus_connection_call(). Don't pretend that we would need such
trivial wrappers in libnm.
Instead, recommend to use g_dbus_connection_call(). Or alternatively,
the convenience wrappers nm_client_dbus_call() and
nm_client_dbus_set_property().
2020-03-13 18:24:37 +01:00
|
|
|
* Deprecated: 1.22: Use the async command nm_client_dbus_call() on %NM_DBUS_PATH,
|
|
|
|
|
* %NM_DBUS_INTERFACE to call "SetLogging" with "(ss)" arguments for level and domains.
|
2014-07-24 08:53:33 -04:00
|
|
|
**/
|
|
|
|
|
gboolean
|
2014-09-18 16:11:00 -04:00
|
|
|
nm_client_set_logging(NMClient *client, const char *level, const char *domains, GError **error)
|
2014-07-24 08:53:33 -04:00
|
|
|
{
|
|
|
|
|
g_return_val_if_fail(NM_IS_CLIENT(client), FALSE);
|
2014-09-18 16:11:00 -04:00
|
|
|
g_return_val_if_fail(error == NULL || *error == NULL, FALSE);
|
2014-07-24 08:53:33 -04:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
return _nm_client_dbus_call_sync_void(client,
|
2019-10-04 16:18:51 +02:00
|
|
|
NULL,
|
|
|
|
|
NM_DBUS_PATH,
|
|
|
|
|
NM_DBUS_INTERFACE,
|
|
|
|
|
"SetLogging",
|
|
|
|
|
g_variant_new("(ss)", level ?: "", domains ?: ""),
|
|
|
|
|
G_DBUS_CALL_FLAGS_NONE,
|
|
|
|
|
NM_DBUS_DEFAULT_TIMEOUT_MSEC,
|
|
|
|
|
TRUE,
|
|
|
|
|
error);
|
2014-07-24 08:53:33 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
2014-09-18 16:11:00 -04:00
|
|
|
* nm_client_get_permission_result:
|
2014-07-24 08:53:33 -04:00
|
|
|
* @client: a #NMClient
|
2014-09-18 16:11:00 -04:00
|
|
|
* @permission: the permission for which to return the result, one of #NMClientPermission
|
2014-07-24 08:53:33 -04:00
|
|
|
*
|
2014-09-18 16:11:00 -04:00
|
|
|
* Requests the result of a specific permission, which indicates whether the
|
|
|
|
|
* client can or cannot perform the action the permission represents
|
2014-07-24 08:53:33 -04:00
|
|
|
*
|
2014-09-18 16:11:00 -04:00
|
|
|
* Returns: the permission's result, one of #NMClientPermissionResult
|
2014-07-24 08:53:33 -04:00
|
|
|
**/
|
2014-09-18 16:11:00 -04:00
|
|
|
NMClientPermissionResult
|
|
|
|
|
nm_client_get_permission_result(NMClient *client, NMClientPermission permission)
|
2014-07-24 08:53:33 -04:00
|
|
|
{
|
2021-11-09 13:28:54 +01:00
|
|
|
NMClientPrivate *priv;
|
2019-12-06 11:48:04 +01:00
|
|
|
NMClientPermissionResult result = NM_CLIENT_PERMISSION_RESULT_UNKNOWN;
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
|
2014-09-18 16:11:00 -04:00
|
|
|
g_return_val_if_fail(NM_IS_CLIENT(client), NM_CLIENT_PERMISSION_RESULT_UNKNOWN);
|
|
|
|
|
|
2019-12-06 11:48:04 +01:00
|
|
|
if (permission > NM_CLIENT_PERMISSION_NONE && permission <= NM_CLIENT_PERMISSION_LAST) {
|
|
|
|
|
priv = NM_CLIENT_GET_PRIVATE(client);
|
|
|
|
|
if (priv->permissions)
|
|
|
|
|
result = priv->permissions[permission - 1];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return result;
|
2014-07-24 08:53:33 -04:00
|
|
|
}
|
|
|
|
|
|
2019-12-05 15:53:51 +01:00
|
|
|
/**
|
|
|
|
|
* nm_client_get_permissions_state:
|
|
|
|
|
* @self: the #NMClient instance
|
|
|
|
|
*
|
|
|
|
|
* Returns: the state of the cached permissions. %NM_TERNARY_DEFAULT
|
|
|
|
|
* means that no permissions result was yet received. All permissions
|
|
|
|
|
* are unknown. %NM_TERNARY_TRUE means that the permissions got received
|
|
|
|
|
* and are cached. %%NM_TERNARY_FALSE means that permissions are cached,
|
2020-07-04 11:37:01 +03:00
|
|
|
* but they are invalided as "CheckPermissions" signal was received
|
2019-12-05 15:53:51 +01:00
|
|
|
* in the meantime.
|
|
|
|
|
*
|
|
|
|
|
* Since: 1.24
|
|
|
|
|
*/
|
|
|
|
|
NMTernary
|
|
|
|
|
nm_client_get_permissions_state(NMClient *self)
|
|
|
|
|
{
|
|
|
|
|
g_return_val_if_fail(NM_IS_CLIENT(self), NM_TERNARY_DEFAULT);
|
|
|
|
|
|
|
|
|
|
return NM_CLIENT_GET_PRIVATE(self)->permissions_state;
|
|
|
|
|
}
|
|
|
|
|
|
2014-07-24 08:53:33 -04:00
|
|
|
/**
|
2014-09-18 16:11:00 -04:00
|
|
|
* nm_client_get_connectivity:
|
|
|
|
|
* @client: an #NMClient
|
2014-07-24 08:53:33 -04:00
|
|
|
*
|
2014-09-18 16:11:00 -04:00
|
|
|
* Gets the current network connectivity state. Contrast
|
|
|
|
|
* nm_client_check_connectivity() and
|
|
|
|
|
* nm_client_check_connectivity_async(), which re-check the
|
|
|
|
|
* connectivity state first before returning any information.
|
2014-07-24 08:53:33 -04:00
|
|
|
*
|
2014-09-18 16:11:00 -04:00
|
|
|
* Returns: the current connectivity state
|
|
|
|
|
*/
|
|
|
|
|
NMConnectivityState
|
|
|
|
|
nm_client_get_connectivity(NMClient *client)
|
2014-07-24 08:53:33 -04:00
|
|
|
{
|
2015-03-18 18:08:24 +01:00
|
|
|
g_return_val_if_fail(NM_IS_CLIENT(client), NM_CONNECTIVITY_UNKNOWN);
|
2014-07-24 08:53:33 -04:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
return NM_CLIENT_GET_PRIVATE(client)->nm.connectivity;
|
2014-07-24 08:53:33 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
2014-09-18 16:11:00 -04:00
|
|
|
* nm_client_check_connectivity:
|
|
|
|
|
* @client: an #NMClient
|
|
|
|
|
* @cancellable: a #GCancellable
|
|
|
|
|
* @error: return location for a #GError
|
2014-07-24 08:53:33 -04:00
|
|
|
*
|
2014-09-18 16:11:00 -04:00
|
|
|
* Updates the network connectivity state and returns the (new)
|
|
|
|
|
* current state. Contrast nm_client_get_connectivity(), which returns
|
|
|
|
|
* the most recent known state without re-checking.
|
|
|
|
|
*
|
|
|
|
|
* This is a blocking call; use nm_client_check_connectivity_async()
|
|
|
|
|
* if you do not want to block.
|
|
|
|
|
*
|
|
|
|
|
* Returns: the (new) current connectivity state
|
libnm: deprecate synchronous/blocking API in libnm
Note that D-Bus is fundamentally asynchronous. Doing blocking calls
on top of D-Bus is odd, especially for libnm's NMClient. That is because
NMClient essentially is a client-side cache of the objects from the D-Bus
interface. This cache should be filled exclusively by (asynchronous) D-Bus
events (PropertiesChanged). So, making a blocking D-Bus call means to wait
for a response and return it, while queuing all messages that are received
in the meantime.
Basically there are three ways how a synchronous API on NMClient could behave:
1) the call just calls g_dbus_connection_call_sync(). This means
that libnm sends a D-Bus request via GDBusConnection, and blockingly
waits for the response. All D-Bus messages that get received in the
meantime are queued in the GMainContext that belongs to NMClient.
That means, none of these D-Bus events are processed until we
iterate the GMainContext after the call returns. The effect is,
that NMClient (and all cached objects in there) are unaffected by
the D-Bus request.
Most of the synchronous API calls in libnm are of this kind.
The problem is that the strict ordering of D-Bus events gets
violated.
For some API this is not an immediate problem. Take for example
nm_device_wifi_request_scan(). The call merely blockingly tells
NetworkManager to start scanning, but since NetworkManager's D-Bus
API does not directly expose any state that tells whether we are
currently scanning, this out of order processing of the D-Bus
request is a small issue.
The problem is more obvious for nm_client_networking_set_enabled().
After calling it, NM_CLIENT_NETWORKING_ENABLED is still unaffected
and unchanged, because the PropertiesChanged signal from D-Bus
is not yet processed.
This means, while you make such a blocking call, NMClient's state
does not change. But usually you perform the synchronous call
to change some state. In this form, the blocking call is not useful,
because NMClient only changes the state after iterating the GMainContext,
and not after the blocking call returns.
2) like 1), but after making the blocking g_dbus_connection_call_sync(),
update the NMClient cache artificially. This is what
nm_manager_check_connectivity() does, to "fix" bgo#784629.
This also has the problem of out-of-order events, but it kinda
solves the problem of not changing the state during the blocking
call. But it does so by hacking the state of the cache. I think
this is really wrong because the state should only be updated from
the ordered stream of D-Bus messages (PropertiesChanged signal and
similar). When libnm decides to modify the state, there may be already
D-Bus messages queued that affect this very state.
3) instead of calling g_dbus_connection_call_sync(), use the
asynchronous g_dbus_connection_call(). If we would use a sepaate
GMainContext for all D-Bus related calls, we could ensure that
while we block for the response, we iterate that internal main context.
This might be nice, because all events are processed in order and
after the blocking call returns, the NMClient state is up to date.
The are problems however: current blocking API does not do this,
so it's a significant change in behavior. Also, it might be
unexpected to the user that during the blocking call the entire
content of NMClient's cache might change and all pointers to the
cache might be invalidated. Also, of course NMClient would invoke
signals for all the changes that happen.
Another problem is that this would be more effort to implement
and it involves a small performance overhead for all D-Bus related
calls (because we have to serialize all events in an internal
GMainContext first and then invoke them on the caller's context).
Also, if the users wants this behavior, they could implement it themself
by running libnm in their own GMainContext. Note that libnm might
have bugs to make that really working, but that should be fixed
instead of adding such synchrnous API behavior.
Read also [1], for why blocking calls are wrong.
[1] https://smcv.pseudorandom.co.uk/2008/11/nonblocking/
So, all possible behaviors for synchronous API have severe behavioural
issues. Mark all this API as deprecated. Also, this serves the purpose of
identifying blocking D-Bus calls in libnm.
Note that "deprecated" here does not really mean that the API is going
to be removed. We don't break API. The user may:
- continue to use this API. It's deprecated, awkward and discouraged,
but if it works, by all means use it.
- use asynchronous API. That's the only sensible way to use D-Bus.
If libnm lacks a certain asynchronous counterpart, it should be
added.
- use GDBusConnection directly. There really isn't anything wrong
with D-Bus or GDBusConnection. This deprecated API is just a wrapper
around g_dbus_connection_call_sync(). You may call it directly
without feeling dirty.
---
The only other remainging API is the synchronous GInitable call for
NMClient. That is an entirely separate beast and not particularly
wrong (from an API point of view).
Note that synchronous API in NMSecretAgentOld, NMVpnPluginOld and
NMVpnServicePlugin as not deprecated here. These types are not part
of the D-Bus cache and while they have similar issues, it's less severe
because they have less state.
2019-09-04 13:58:43 +02:00
|
|
|
*
|
2020-03-13 19:45:09 +01:00
|
|
|
* Deprecated: 1.22: Use nm_client_check_connectivity_async() or GDBusConnection.
|
2014-09-18 16:11:00 -04:00
|
|
|
*/
|
|
|
|
|
NMConnectivityState
|
|
|
|
|
nm_client_check_connectivity(NMClient *client, GCancellable *cancellable, GError **error)
|
2014-07-24 08:53:33 -04:00
|
|
|
{
|
2021-11-09 13:28:54 +01:00
|
|
|
NMClientPrivate *priv;
|
2019-10-04 16:18:51 +02:00
|
|
|
gs_unref_variant GVariant *ret = NULL;
|
|
|
|
|
guint32 connectivity;
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2014-09-18 16:11:00 -04:00
|
|
|
g_return_val_if_fail(NM_IS_CLIENT(client), NM_CONNECTIVITY_UNKNOWN);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
ret = _nm_client_dbus_call_sync(client,
|
2019-10-04 16:18:51 +02:00
|
|
|
cancellable,
|
|
|
|
|
NM_DBUS_PATH,
|
|
|
|
|
NM_DBUS_INTERFACE,
|
|
|
|
|
"CheckConnectivity",
|
|
|
|
|
g_variant_new("()"),
|
|
|
|
|
G_VARIANT_TYPE("(u)"),
|
|
|
|
|
G_DBUS_CALL_FLAGS_NONE,
|
|
|
|
|
NM_DBUS_DEFAULT_TIMEOUT_MSEC,
|
|
|
|
|
TRUE,
|
|
|
|
|
error);
|
|
|
|
|
if (!ret)
|
2014-10-07 09:26:40 -04:00
|
|
|
return NM_CONNECTIVITY_UNKNOWN;
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2019-10-04 16:18:51 +02:00
|
|
|
g_variant_get(ret, "(u)", &connectivity);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2019-10-04 16:18:51 +02:00
|
|
|
/* upon receiving the synchronous response, we hack the NMClient state
|
|
|
|
|
* and update the property outside the ordered D-Bus messages (like
|
|
|
|
|
* "PropertiesChanged" signals).
|
|
|
|
|
*
|
|
|
|
|
* This is really ugly, we shouldn't do this. */
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
|
|
|
|
|
priv = NM_CLIENT_GET_PRIVATE(client);
|
|
|
|
|
|
|
|
|
|
if (priv->nm.connectivity != connectivity) {
|
|
|
|
|
priv->nm.connectivity = connectivity;
|
|
|
|
|
_notify(client, PROP_CONNECTIVITY);
|
|
|
|
|
}
|
2019-10-04 16:18:51 +02:00
|
|
|
|
|
|
|
|
return connectivity;
|
2014-09-18 16:11:00 -04:00
|
|
|
}
|
|
|
|
|
|
2014-07-24 08:53:33 -04:00
|
|
|
/**
|
2014-09-18 16:11:00 -04:00
|
|
|
* nm_client_check_connectivity_async:
|
|
|
|
|
* @client: an #NMClient
|
|
|
|
|
* @cancellable: a #GCancellable
|
|
|
|
|
* @callback: callback to call with the result
|
|
|
|
|
* @user_data: data for @callback.
|
2014-07-24 08:53:33 -04:00
|
|
|
*
|
2014-09-18 16:11:00 -04:00
|
|
|
* Asynchronously updates the network connectivity state and invokes
|
|
|
|
|
* @callback when complete. Contrast nm_client_get_connectivity(),
|
|
|
|
|
* which (immediately) returns the most recent known state without
|
|
|
|
|
* re-checking, and nm_client_check_connectivity(), which blocks.
|
|
|
|
|
*/
|
|
|
|
|
void
|
2021-11-09 13:28:54 +01:00
|
|
|
nm_client_check_connectivity_async(NMClient *client,
|
|
|
|
|
GCancellable *cancellable,
|
2014-09-18 16:11:00 -04:00
|
|
|
GAsyncReadyCallback callback,
|
|
|
|
|
gpointer user_data)
|
|
|
|
|
{
|
|
|
|
|
g_return_if_fail(NM_IS_CLIENT(client));
|
2019-10-04 17:42:32 +02:00
|
|
|
g_return_if_fail(!cancellable || G_IS_CANCELLABLE(cancellable));
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
_nm_client_dbus_call(client,
|
|
|
|
|
client,
|
2019-10-04 17:42:32 +02:00
|
|
|
nm_client_check_connectivity_async,
|
|
|
|
|
cancellable,
|
|
|
|
|
callback,
|
|
|
|
|
user_data,
|
|
|
|
|
NM_DBUS_PATH,
|
|
|
|
|
NM_DBUS_INTERFACE,
|
|
|
|
|
"CheckConnectivity",
|
|
|
|
|
g_variant_new("()"),
|
|
|
|
|
G_VARIANT_TYPE("(u)"),
|
|
|
|
|
G_DBUS_CALL_FLAGS_NONE,
|
|
|
|
|
NM_DBUS_DEFAULT_TIMEOUT_MSEC,
|
|
|
|
|
nm_dbus_connection_call_finish_variant_strip_dbus_error_cb);
|
2014-09-18 16:11:00 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* nm_client_check_connectivity_finish:
|
|
|
|
|
* @client: an #NMClient
|
|
|
|
|
* @result: the #GAsyncResult
|
|
|
|
|
* @error: return location for a #GError
|
2014-07-24 08:53:33 -04:00
|
|
|
*
|
2014-09-18 16:11:00 -04:00
|
|
|
* Retrieves the result of an nm_client_check_connectivity_async()
|
|
|
|
|
* call.
|
|
|
|
|
*
|
|
|
|
|
* Returns: the (new) current connectivity state
|
|
|
|
|
*/
|
|
|
|
|
NMConnectivityState
|
|
|
|
|
nm_client_check_connectivity_finish(NMClient *client, GAsyncResult *result, GError **error)
|
2014-07-24 08:53:33 -04:00
|
|
|
{
|
2019-10-04 17:42:32 +02:00
|
|
|
gs_unref_variant GVariant *ret = NULL;
|
|
|
|
|
guint32 connectivity;
|
2014-07-24 08:53:33 -04:00
|
|
|
|
2014-10-07 09:26:40 -04:00
|
|
|
g_return_val_if_fail(NM_IS_CLIENT(client), NM_CONNECTIVITY_UNKNOWN);
|
2020-06-25 14:03:57 +02:00
|
|
|
g_return_val_if_fail(nm_g_task_is_valid(result, client, nm_client_check_connectivity_async),
|
|
|
|
|
NM_CONNECTIVITY_UNKNOWN);
|
2014-09-18 16:11:00 -04:00
|
|
|
|
2019-10-04 17:42:32 +02:00
|
|
|
ret = g_task_propagate_pointer(G_TASK(result), error);
|
|
|
|
|
if (!ret)
|
2014-09-18 16:11:00 -04:00
|
|
|
return NM_CONNECTIVITY_UNKNOWN;
|
2019-10-04 17:42:32 +02:00
|
|
|
|
|
|
|
|
g_variant_get(ret, "(u)", &connectivity);
|
|
|
|
|
return connectivity;
|
2014-07-24 08:53:33 -04:00
|
|
|
}
|
|
|
|
|
|
2014-09-29 10:58:16 -04:00
|
|
|
/**
|
|
|
|
|
* nm_client_save_hostname:
|
|
|
|
|
* @client: the %NMClient
|
2023-03-01 01:21:38 +01:00
|
|
|
* @hostname: (nullable): the new persistent hostname to set, or %NULL to
|
2014-09-29 10:58:16 -04:00
|
|
|
* clear any existing persistent hostname
|
|
|
|
|
* @cancellable: a #GCancellable, or %NULL
|
|
|
|
|
* @error: return location for #GError
|
|
|
|
|
*
|
|
|
|
|
* Requests that the machine's persistent hostname be set to the specified value
|
|
|
|
|
* or cleared.
|
|
|
|
|
*
|
|
|
|
|
* Returns: %TRUE if the request was successful, %FALSE if it failed
|
libnm: deprecate synchronous/blocking API in libnm
Note that D-Bus is fundamentally asynchronous. Doing blocking calls
on top of D-Bus is odd, especially for libnm's NMClient. That is because
NMClient essentially is a client-side cache of the objects from the D-Bus
interface. This cache should be filled exclusively by (asynchronous) D-Bus
events (PropertiesChanged). So, making a blocking D-Bus call means to wait
for a response and return it, while queuing all messages that are received
in the meantime.
Basically there are three ways how a synchronous API on NMClient could behave:
1) the call just calls g_dbus_connection_call_sync(). This means
that libnm sends a D-Bus request via GDBusConnection, and blockingly
waits for the response. All D-Bus messages that get received in the
meantime are queued in the GMainContext that belongs to NMClient.
That means, none of these D-Bus events are processed until we
iterate the GMainContext after the call returns. The effect is,
that NMClient (and all cached objects in there) are unaffected by
the D-Bus request.
Most of the synchronous API calls in libnm are of this kind.
The problem is that the strict ordering of D-Bus events gets
violated.
For some API this is not an immediate problem. Take for example
nm_device_wifi_request_scan(). The call merely blockingly tells
NetworkManager to start scanning, but since NetworkManager's D-Bus
API does not directly expose any state that tells whether we are
currently scanning, this out of order processing of the D-Bus
request is a small issue.
The problem is more obvious for nm_client_networking_set_enabled().
After calling it, NM_CLIENT_NETWORKING_ENABLED is still unaffected
and unchanged, because the PropertiesChanged signal from D-Bus
is not yet processed.
This means, while you make such a blocking call, NMClient's state
does not change. But usually you perform the synchronous call
to change some state. In this form, the blocking call is not useful,
because NMClient only changes the state after iterating the GMainContext,
and not after the blocking call returns.
2) like 1), but after making the blocking g_dbus_connection_call_sync(),
update the NMClient cache artificially. This is what
nm_manager_check_connectivity() does, to "fix" bgo#784629.
This also has the problem of out-of-order events, but it kinda
solves the problem of not changing the state during the blocking
call. But it does so by hacking the state of the cache. I think
this is really wrong because the state should only be updated from
the ordered stream of D-Bus messages (PropertiesChanged signal and
similar). When libnm decides to modify the state, there may be already
D-Bus messages queued that affect this very state.
3) instead of calling g_dbus_connection_call_sync(), use the
asynchronous g_dbus_connection_call(). If we would use a sepaate
GMainContext for all D-Bus related calls, we could ensure that
while we block for the response, we iterate that internal main context.
This might be nice, because all events are processed in order and
after the blocking call returns, the NMClient state is up to date.
The are problems however: current blocking API does not do this,
so it's a significant change in behavior. Also, it might be
unexpected to the user that during the blocking call the entire
content of NMClient's cache might change and all pointers to the
cache might be invalidated. Also, of course NMClient would invoke
signals for all the changes that happen.
Another problem is that this would be more effort to implement
and it involves a small performance overhead for all D-Bus related
calls (because we have to serialize all events in an internal
GMainContext first and then invoke them on the caller's context).
Also, if the users wants this behavior, they could implement it themself
by running libnm in their own GMainContext. Note that libnm might
have bugs to make that really working, but that should be fixed
instead of adding such synchrnous API behavior.
Read also [1], for why blocking calls are wrong.
[1] https://smcv.pseudorandom.co.uk/2008/11/nonblocking/
So, all possible behaviors for synchronous API have severe behavioural
issues. Mark all this API as deprecated. Also, this serves the purpose of
identifying blocking D-Bus calls in libnm.
Note that "deprecated" here does not really mean that the API is going
to be removed. We don't break API. The user may:
- continue to use this API. It's deprecated, awkward and discouraged,
but if it works, by all means use it.
- use asynchronous API. That's the only sensible way to use D-Bus.
If libnm lacks a certain asynchronous counterpart, it should be
added.
- use GDBusConnection directly. There really isn't anything wrong
with D-Bus or GDBusConnection. This deprecated API is just a wrapper
around g_dbus_connection_call_sync(). You may call it directly
without feeling dirty.
---
The only other remainging API is the synchronous GInitable call for
NMClient. That is an entirely separate beast and not particularly
wrong (from an API point of view).
Note that synchronous API in NMSecretAgentOld, NMVpnPluginOld and
NMVpnServicePlugin as not deprecated here. These types are not part
of the D-Bus cache and while they have similar issues, it's less severe
because they have less state.
2019-09-04 13:58:43 +02:00
|
|
|
*
|
2020-03-13 19:45:09 +01:00
|
|
|
* Deprecated: 1.22: Use nm_client_save_hostname_async() or GDBusConnection.
|
2014-09-29 10:58:16 -04:00
|
|
|
**/
|
|
|
|
|
gboolean
|
2021-11-09 13:28:54 +01:00
|
|
|
nm_client_save_hostname(NMClient *client,
|
|
|
|
|
const char *hostname,
|
2014-09-29 10:58:16 -04:00
|
|
|
GCancellable *cancellable,
|
2021-11-09 13:28:54 +01:00
|
|
|
GError **error)
|
2014-09-29 10:58:16 -04:00
|
|
|
{
|
|
|
|
|
g_return_val_if_fail(NM_IS_CLIENT(client), FALSE);
|
2019-10-07 09:39:34 +02:00
|
|
|
g_return_val_if_fail(!cancellable || G_IS_CANCELLABLE(cancellable), FALSE);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
return _nm_client_dbus_call_sync_void(client,
|
2019-10-07 09:39:34 +02:00
|
|
|
cancellable,
|
|
|
|
|
NM_DBUS_PATH_SETTINGS,
|
|
|
|
|
NM_DBUS_INTERFACE_SETTINGS,
|
|
|
|
|
"SaveHostname",
|
|
|
|
|
g_variant_new("(s)", hostname ?: ""),
|
|
|
|
|
G_DBUS_CALL_FLAGS_NONE,
|
|
|
|
|
NM_DBUS_DEFAULT_TIMEOUT_MSEC,
|
|
|
|
|
TRUE,
|
|
|
|
|
error);
|
2014-09-29 10:58:16 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* nm_client_save_hostname_async:
|
|
|
|
|
* @client: the %NMClient
|
2023-03-01 01:21:38 +01:00
|
|
|
* @hostname: (nullable): the new persistent hostname to set, or %NULL to
|
2014-09-29 10:58:16 -04:00
|
|
|
* clear any existing persistent hostname
|
|
|
|
|
* @cancellable: a #GCancellable, or %NULL
|
2024-11-27 12:51:24 +01:00
|
|
|
* @callback: (scope async) (closure user_data): callback to be called when the operation completes
|
|
|
|
|
* @user_data: caller-specific data passed to @callback
|
2014-09-29 10:58:16 -04:00
|
|
|
*
|
|
|
|
|
* Requests that the machine's persistent hostname be set to the specified value
|
|
|
|
|
* or cleared.
|
|
|
|
|
**/
|
|
|
|
|
void
|
2021-11-09 13:28:54 +01:00
|
|
|
nm_client_save_hostname_async(NMClient *client,
|
|
|
|
|
const char *hostname,
|
|
|
|
|
GCancellable *cancellable,
|
2014-09-29 10:58:16 -04:00
|
|
|
GAsyncReadyCallback callback,
|
|
|
|
|
gpointer user_data)
|
|
|
|
|
{
|
|
|
|
|
g_return_if_fail(NM_IS_CLIENT(client));
|
2019-10-07 09:43:47 +02:00
|
|
|
g_return_if_fail(!cancellable || G_IS_CANCELLABLE(cancellable));
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
_nm_client_dbus_call(client,
|
|
|
|
|
client,
|
2019-10-07 09:43:47 +02:00
|
|
|
nm_client_save_hostname_async,
|
|
|
|
|
cancellable,
|
|
|
|
|
callback,
|
|
|
|
|
user_data,
|
|
|
|
|
NM_DBUS_PATH_SETTINGS,
|
|
|
|
|
NM_DBUS_INTERFACE_SETTINGS,
|
|
|
|
|
"SaveHostname",
|
|
|
|
|
g_variant_new("(s)", hostname ?: ""),
|
|
|
|
|
G_VARIANT_TYPE("()"),
|
|
|
|
|
G_DBUS_CALL_FLAGS_NONE,
|
|
|
|
|
NM_DBUS_DEFAULT_TIMEOUT_MSEC,
|
|
|
|
|
nm_dbus_connection_call_finish_void_strip_dbus_error_cb);
|
2014-09-29 10:58:16 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* nm_client_save_hostname_finish:
|
|
|
|
|
* @client: the %NMClient
|
|
|
|
|
* @result: the result passed to the #GAsyncReadyCallback
|
|
|
|
|
* @error: return location for #GError
|
|
|
|
|
*
|
|
|
|
|
* Gets the result of an nm_client_save_hostname_async() call.
|
|
|
|
|
*
|
|
|
|
|
* Returns: %TRUE if the request was successful, %FALSE if it failed
|
|
|
|
|
**/
|
|
|
|
|
gboolean
|
|
|
|
|
nm_client_save_hostname_finish(NMClient *client, GAsyncResult *result, GError **error)
|
|
|
|
|
{
|
|
|
|
|
g_return_val_if_fail(NM_IS_CLIENT(client), FALSE);
|
2019-10-07 09:43:47 +02:00
|
|
|
g_return_val_if_fail(nm_g_task_is_valid(result, client, nm_client_save_hostname_async), FALSE);
|
2014-09-29 10:58:16 -04:00
|
|
|
|
2019-10-07 09:43:47 +02:00
|
|
|
return g_task_propagate_boolean(G_TASK(result), error);
|
2014-09-29 10:58:16 -04:00
|
|
|
}
|
|
|
|
|
|
2016-10-02 18:22:50 +02:00
|
|
|
/*****************************************************************************/
|
2019-10-07 09:43:47 +02:00
|
|
|
/* Devices */
|
2016-10-02 18:22:50 +02:00
|
|
|
/*****************************************************************************/
|
2014-09-18 16:11:00 -04:00
|
|
|
|
2014-07-24 08:53:33 -04:00
|
|
|
/**
|
2014-09-18 16:11:00 -04:00
|
|
|
* nm_client_get_devices:
|
2014-07-24 08:53:33 -04:00
|
|
|
* @client: a #NMClient
|
|
|
|
|
*
|
2014-09-18 16:11:00 -04:00
|
|
|
* Gets all the known network devices. Use nm_device_get_type() or the
|
2014-12-18 12:44:57 -05:00
|
|
|
* <literal>NM_IS_DEVICE_XXXX</literal> functions to determine what kind of
|
|
|
|
|
* device member of the returned array is, and then you may use device-specific
|
|
|
|
|
* methods such as nm_device_ethernet_get_hw_address().
|
2014-07-24 08:53:33 -04:00
|
|
|
*
|
2014-09-18 16:11:00 -04:00
|
|
|
* Returns: (transfer none) (element-type NMDevice): a #GPtrArray
|
|
|
|
|
* containing all the #NMDevices. The returned array is owned by the
|
|
|
|
|
* #NMClient object and should not be modified.
|
2014-07-24 08:53:33 -04:00
|
|
|
**/
|
2014-09-18 16:11:00 -04:00
|
|
|
const GPtrArray *
|
|
|
|
|
nm_client_get_devices(NMClient *client)
|
2014-07-24 08:53:33 -04:00
|
|
|
{
|
2014-09-18 16:11:00 -04:00
|
|
|
g_return_val_if_fail(NM_IS_CLIENT(client), NULL);
|
2014-07-24 08:53:33 -04:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
return nml_dbus_property_ao_get_objs_as_ptrarray(
|
|
|
|
|
&NM_CLIENT_GET_PRIVATE(client)->nm.property_ao[PROPERTY_AO_IDX_DEVICES]);
|
2014-07-24 08:53:33 -04:00
|
|
|
}
|
|
|
|
|
|
2014-10-10 14:28:49 -05:00
|
|
|
/**
|
|
|
|
|
* nm_client_get_all_devices:
|
|
|
|
|
* @client: a #NMClient
|
|
|
|
|
*
|
|
|
|
|
* Gets both real devices and device placeholders (eg, software devices which
|
|
|
|
|
* do not currently exist, but could be created automatically by NetworkManager
|
|
|
|
|
* if one of their NMDevice::ActivatableConnections was activated). Use
|
|
|
|
|
* nm_device_is_real() to determine whether each device is a real device or
|
|
|
|
|
* a placeholder.
|
|
|
|
|
*
|
|
|
|
|
* Use nm_device_get_type() or the NM_IS_DEVICE_XXXX() functions to determine
|
|
|
|
|
* what kind of device each member of the returned array is, and then you may
|
|
|
|
|
* use device-specific methods such as nm_device_ethernet_get_hw_address().
|
|
|
|
|
*
|
|
|
|
|
* Returns: (transfer none) (element-type NMDevice): a #GPtrArray
|
|
|
|
|
* containing all the #NMDevices. The returned array is owned by the
|
|
|
|
|
* #NMClient object and should not be modified.
|
|
|
|
|
*
|
|
|
|
|
* Since: 1.2
|
|
|
|
|
**/
|
|
|
|
|
const GPtrArray *
|
|
|
|
|
nm_client_get_all_devices(NMClient *client)
|
|
|
|
|
{
|
|
|
|
|
g_return_val_if_fail(NM_IS_CLIENT(client), NULL);
|
|
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
return nml_dbus_property_ao_get_objs_as_ptrarray(
|
|
|
|
|
&NM_CLIENT_GET_PRIVATE(client)->nm.property_ao[PROPERTY_AO_IDX_ALL_DEVICES]);
|
2014-10-10 14:28:49 -05:00
|
|
|
}
|
|
|
|
|
|
2014-07-24 08:53:33 -04:00
|
|
|
/**
|
2014-09-18 16:11:00 -04:00
|
|
|
* nm_client_get_device_by_path:
|
2014-07-24 08:53:33 -04:00
|
|
|
* @client: a #NMClient
|
2014-09-18 16:11:00 -04:00
|
|
|
* @object_path: the object path to search for
|
2014-07-24 08:53:33 -04:00
|
|
|
*
|
2014-09-18 16:11:00 -04:00
|
|
|
* Gets a #NMDevice from a #NMClient.
|
|
|
|
|
*
|
|
|
|
|
* Returns: (transfer none): the #NMDevice for the given @object_path or %NULL if none is found.
|
2014-07-24 08:53:33 -04:00
|
|
|
**/
|
2014-09-18 16:11:00 -04:00
|
|
|
NMDevice *
|
|
|
|
|
nm_client_get_device_by_path(NMClient *client, const char *object_path)
|
2014-07-24 08:53:33 -04:00
|
|
|
{
|
2014-09-18 16:11:00 -04:00
|
|
|
g_return_val_if_fail(NM_IS_CLIENT(client), NULL);
|
|
|
|
|
g_return_val_if_fail(object_path, NULL);
|
2014-07-24 08:53:33 -04:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
return _dbobjs_get_nmobj_unpack_visible(client, object_path, NM_TYPE_DEVICE);
|
2014-07-24 08:53:33 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
2014-09-18 16:11:00 -04:00
|
|
|
* nm_client_get_device_by_iface:
|
2014-07-24 08:53:33 -04:00
|
|
|
* @client: a #NMClient
|
2014-09-18 16:11:00 -04:00
|
|
|
* @iface: the interface name to search for
|
2014-07-24 08:53:33 -04:00
|
|
|
*
|
2014-09-18 16:11:00 -04:00
|
|
|
* Gets a #NMDevice from a #NMClient.
|
2014-07-24 08:53:33 -04:00
|
|
|
*
|
2014-09-18 16:11:00 -04:00
|
|
|
* Returns: (transfer none): the #NMDevice for the given @iface or %NULL if none is found.
|
2014-07-24 08:53:33 -04:00
|
|
|
**/
|
2014-09-18 16:11:00 -04:00
|
|
|
NMDevice *
|
|
|
|
|
nm_client_get_device_by_iface(NMClient *client, const char *iface)
|
2014-07-24 08:53:33 -04:00
|
|
|
{
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
const GPtrArray *devices;
|
|
|
|
|
guint i;
|
|
|
|
|
|
2014-09-18 16:11:00 -04:00
|
|
|
g_return_val_if_fail(NM_IS_CLIENT(client), NULL);
|
|
|
|
|
g_return_val_if_fail(iface, NULL);
|
|
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
devices = nm_client_get_devices(client);
|
|
|
|
|
for (i = 0; i < devices->len; i++) {
|
|
|
|
|
NMDevice *candidate = g_ptr_array_index(devices, i);
|
|
|
|
|
|
|
|
|
|
if (nm_streq0(nm_device_get_iface(candidate), iface))
|
|
|
|
|
return candidate;
|
|
|
|
|
}
|
2016-10-18 16:35:07 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
return NULL;
|
2014-07-24 08:53:33 -04:00
|
|
|
}
|
|
|
|
|
|
2016-10-02 18:22:50 +02:00
|
|
|
/*****************************************************************************/
|
2014-09-18 16:11:00 -04:00
|
|
|
/* Active Connections */
|
2016-10-02 18:22:50 +02:00
|
|
|
/*****************************************************************************/
|
2014-09-18 16:11:00 -04:00
|
|
|
|
2014-07-24 08:53:33 -04:00
|
|
|
/**
|
2014-09-18 16:11:00 -04:00
|
|
|
* nm_client_get_active_connections:
|
2014-07-24 08:53:33 -04:00
|
|
|
* @client: a #NMClient
|
|
|
|
|
*
|
2014-09-18 16:11:00 -04:00
|
|
|
* Gets the active connections.
|
2014-07-24 08:53:33 -04:00
|
|
|
*
|
2014-09-18 16:11:00 -04:00
|
|
|
* Returns: (transfer none) (element-type NMActiveConnection): a #GPtrArray
|
|
|
|
|
* containing all the active #NMActiveConnections.
|
|
|
|
|
* The returned array is owned by the client and should not be modified.
|
2014-07-24 08:53:33 -04:00
|
|
|
**/
|
2014-09-18 16:11:00 -04:00
|
|
|
const GPtrArray *
|
|
|
|
|
nm_client_get_active_connections(NMClient *client)
|
2014-07-24 08:53:33 -04:00
|
|
|
{
|
|
|
|
|
g_return_val_if_fail(NM_IS_CLIENT(client), NULL);
|
|
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
return nml_dbus_property_ao_get_objs_as_ptrarray(
|
|
|
|
|
&NM_CLIENT_GET_PRIVATE(client)->nm.property_ao[PROPERTY_AO_IDX_ACTIVE_CONNECTIONS]);
|
2014-07-24 08:53:33 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
2014-09-18 16:11:00 -04:00
|
|
|
* nm_client_get_primary_connection:
|
|
|
|
|
* @client: an #NMClient
|
|
|
|
|
*
|
|
|
|
|
* Gets the #NMActiveConnection corresponding to the primary active
|
|
|
|
|
* network device.
|
|
|
|
|
*
|
|
|
|
|
* In particular, when there is no VPN active, or the VPN does not
|
|
|
|
|
* have the default route, this returns the active connection that has
|
|
|
|
|
* the default route. If there is a VPN active with the default route,
|
|
|
|
|
* then this function returns the active connection that contains the
|
|
|
|
|
* route to the VPN endpoint.
|
|
|
|
|
*
|
|
|
|
|
* If there is no default route, or the default route is over a
|
|
|
|
|
* non-NetworkManager-recognized device, this will return %NULL.
|
|
|
|
|
*
|
|
|
|
|
* Returns: (transfer none): the appropriate #NMActiveConnection, if
|
|
|
|
|
* any
|
|
|
|
|
*/
|
|
|
|
|
NMActiveConnection *
|
|
|
|
|
nm_client_get_primary_connection(NMClient *client)
|
|
|
|
|
{
|
|
|
|
|
g_return_val_if_fail(NM_IS_CLIENT(client), NULL);
|
|
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
return nml_dbus_property_o_get_obj(
|
|
|
|
|
&NM_CLIENT_GET_PRIVATE(client)->nm.property_o[PROPERTY_O_IDX_NM_PRIMAY_CONNECTION]);
|
2014-09-18 16:11:00 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* nm_client_get_activating_connection:
|
|
|
|
|
* @client: an #NMClient
|
|
|
|
|
*
|
|
|
|
|
* Gets the #NMActiveConnection corresponding to a
|
|
|
|
|
* currently-activating connection that is expected to become the new
|
|
|
|
|
* #NMClient:primary-connection upon successful activation.
|
|
|
|
|
*
|
|
|
|
|
* Returns: (transfer none): the appropriate #NMActiveConnection, if
|
|
|
|
|
* any.
|
|
|
|
|
*/
|
|
|
|
|
NMActiveConnection *
|
|
|
|
|
nm_client_get_activating_connection(NMClient *client)
|
|
|
|
|
{
|
|
|
|
|
g_return_val_if_fail(NM_IS_CLIENT(client), NULL);
|
|
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
return nml_dbus_property_o_get_obj(
|
|
|
|
|
&NM_CLIENT_GET_PRIVATE(client)->nm.property_o[PROPERTY_O_IDX_NM_ACTIVATING_CONNECTION]);
|
2014-09-18 16:11:00 -04:00
|
|
|
}
|
|
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
/*****************************************************************************/
|
|
|
|
|
|
2014-09-18 16:11:00 -04:00
|
|
|
static void
|
2019-10-03 16:30:25 +02:00
|
|
|
activate_connection_cb(GObject *object, GAsyncResult *result, gpointer user_data)
|
2014-09-18 16:11:00 -04:00
|
|
|
{
|
2021-11-09 13:28:54 +01:00
|
|
|
gs_unref_object GTask *task = user_data;
|
|
|
|
|
gs_unref_variant GVariant *ret = NULL;
|
|
|
|
|
const char *v_active_connection;
|
|
|
|
|
GError *error = NULL;
|
2014-09-18 16:11:00 -04:00
|
|
|
|
2019-10-03 16:30:25 +02:00
|
|
|
ret = g_dbus_connection_call_finish(G_DBUS_CONNECTION(object), result, &error);
|
|
|
|
|
if (!ret) {
|
2020-01-23 10:36:24 +01:00
|
|
|
if (!nm_utils_error_is_cancelled(error))
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
g_dbus_error_strip_remote_error(error);
|
2019-10-03 16:30:25 +02:00
|
|
|
g_task_return_error(task, error);
|
|
|
|
|
return;
|
|
|
|
|
}
|
2014-09-24 13:14:48 -04:00
|
|
|
|
2019-10-03 16:30:25 +02:00
|
|
|
g_variant_get(ret, "(&o)", &v_active_connection);
|
|
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
_request_wait_start(g_steal_pointer(&task),
|
|
|
|
|
"ActivateConnection",
|
|
|
|
|
NM_TYPE_ACTIVE_CONNECTION,
|
|
|
|
|
v_active_connection,
|
|
|
|
|
NULL);
|
2014-09-18 16:11:00 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* nm_client_activate_connection_async:
|
2014-07-24 08:53:33 -04:00
|
|
|
* @client: a #NMClient
|
2023-03-01 01:21:38 +01:00
|
|
|
* @connection: (nullable): an #NMConnection
|
|
|
|
|
* @device: (nullable): the #NMDevice
|
|
|
|
|
* @specific_object: (nullable): the object path of a connection-type-specific
|
2014-09-18 16:11:00 -04:00
|
|
|
* object this activation should use. This parameter is currently ignored for
|
|
|
|
|
* wired and mobile broadband connections, and the value of %NULL should be used
|
|
|
|
|
* (ie, no specific object). For Wi-Fi or WiMAX connections, pass the object
|
|
|
|
|
* path of a #NMAccessPoint or #NMWimaxNsp owned by @device, which you can
|
|
|
|
|
* get using nm_object_get_path(), and which will be used to complete the
|
|
|
|
|
* details of the newly added connection.
|
|
|
|
|
* @cancellable: a #GCancellable, or %NULL
|
|
|
|
|
* @callback: callback to be called when the activation has started
|
|
|
|
|
* @user_data: caller-specific data passed to @callback
|
2014-07-24 08:53:33 -04:00
|
|
|
*
|
2014-09-18 16:11:00 -04:00
|
|
|
* Asynchronously starts a connection to a particular network using the
|
|
|
|
|
* configuration settings from @connection and the network device @device.
|
|
|
|
|
* Certain connection types also take a "specific object" which is the object
|
|
|
|
|
* path of a connection- specific object, like an #NMAccessPoint for Wi-Fi
|
|
|
|
|
* connections, or an #NMWimaxNsp for WiMAX connections, to which you wish to
|
|
|
|
|
* connect. If the specific object is not given, NetworkManager can, in some
|
|
|
|
|
* cases, automatically determine which network to connect to given the settings
|
|
|
|
|
* in @connection.
|
2014-07-24 08:53:33 -04:00
|
|
|
*
|
2014-09-18 16:11:00 -04:00
|
|
|
* If @connection is not given for a device-based activation, NetworkManager
|
|
|
|
|
* picks the best available connection for the device and activates it.
|
|
|
|
|
*
|
|
|
|
|
* Note that the callback is invoked when NetworkManager has started activating
|
2017-05-11 12:08:02 +02:00
|
|
|
* the new connection, not when it finishes. You can use the returned
|
2014-09-18 16:11:00 -04:00
|
|
|
* #NMActiveConnection object (in particular, #NMActiveConnection:state) to
|
|
|
|
|
* track the activation to its completion.
|
2014-07-24 08:53:33 -04:00
|
|
|
**/
|
2014-09-18 16:11:00 -04:00
|
|
|
void
|
2021-11-09 13:28:54 +01:00
|
|
|
nm_client_activate_connection_async(NMClient *client,
|
|
|
|
|
NMConnection *connection,
|
|
|
|
|
NMDevice *device,
|
|
|
|
|
const char *specific_object,
|
|
|
|
|
GCancellable *cancellable,
|
2014-09-18 16:11:00 -04:00
|
|
|
GAsyncReadyCallback callback,
|
|
|
|
|
gpointer user_data)
|
2014-07-24 08:53:33 -04:00
|
|
|
{
|
2019-10-03 16:30:25 +02:00
|
|
|
const char *arg_connection = NULL;
|
|
|
|
|
const char *arg_device = NULL;
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2014-09-18 16:11:00 -04:00
|
|
|
g_return_if_fail(NM_IS_CLIENT(client));
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2019-10-03 16:30:25 +02:00
|
|
|
if (connection) {
|
2014-09-18 16:11:00 -04:00
|
|
|
g_return_if_fail(NM_IS_CONNECTION(connection));
|
2019-10-03 16:30:25 +02:00
|
|
|
arg_connection = nm_connection_get_path(connection);
|
|
|
|
|
g_return_if_fail(arg_connection);
|
|
|
|
|
}
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2019-10-03 16:30:25 +02:00
|
|
|
if (device) {
|
|
|
|
|
g_return_if_fail(NM_IS_DEVICE(device));
|
|
|
|
|
arg_device = nm_object_get_path(NM_OBJECT(device));
|
|
|
|
|
g_return_if_fail(arg_device);
|
2014-09-18 16:11:00 -04:00
|
|
|
}
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
NML_NMCLIENT_LOG_T(
|
|
|
|
|
client,
|
|
|
|
|
"ActivateConnection() for connection \"%s\", device \"%s\", specific_object \"%s",
|
|
|
|
|
arg_connection ?: "/",
|
|
|
|
|
arg_device ?: "/",
|
|
|
|
|
specific_object ?: "/");
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
_nm_client_dbus_call(
|
|
|
|
|
client,
|
2020-09-28 16:03:33 +02:00
|
|
|
client,
|
2019-10-03 16:30:25 +02:00
|
|
|
nm_client_activate_connection_async,
|
|
|
|
|
cancellable,
|
|
|
|
|
callback,
|
|
|
|
|
user_data,
|
|
|
|
|
NM_DBUS_PATH,
|
|
|
|
|
NM_DBUS_INTERFACE,
|
|
|
|
|
"ActivateConnection",
|
|
|
|
|
g_variant_new("(ooo)", arg_connection ?: "/", arg_device ?: "/", specific_object ?: "/"),
|
|
|
|
|
G_VARIANT_TYPE("(o)"),
|
|
|
|
|
G_DBUS_CALL_FLAGS_NONE,
|
|
|
|
|
NM_DBUS_DEFAULT_TIMEOUT_MSEC,
|
|
|
|
|
activate_connection_cb);
|
2014-07-24 08:53:33 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
2014-09-18 16:11:00 -04:00
|
|
|
* nm_client_activate_connection_finish:
|
|
|
|
|
* @client: an #NMClient
|
|
|
|
|
* @result: the result passed to the #GAsyncReadyCallback
|
|
|
|
|
* @error: location for a #GError, or %NULL
|
2014-07-24 08:53:33 -04:00
|
|
|
*
|
2014-09-18 16:11:00 -04:00
|
|
|
* Gets the result of a call to nm_client_activate_connection_async().
|
2014-07-24 08:53:33 -04:00
|
|
|
*
|
2014-09-18 16:11:00 -04:00
|
|
|
* Returns: (transfer full): the new #NMActiveConnection on success, %NULL on
|
|
|
|
|
* failure, in which case @error will be set.
|
2014-07-24 08:53:33 -04:00
|
|
|
**/
|
2014-09-18 16:11:00 -04:00
|
|
|
NMActiveConnection *
|
|
|
|
|
nm_client_activate_connection_finish(NMClient *client, GAsyncResult *result, GError **error)
|
2014-07-24 08:53:33 -04:00
|
|
|
{
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
return NM_ACTIVE_CONNECTION(
|
|
|
|
|
_request_wait_finish(client, result, nm_client_activate_connection_async, NULL, error));
|
2014-07-24 08:53:33 -04:00
|
|
|
}
|
|
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
/*****************************************************************************/
|
|
|
|
|
|
2014-09-18 16:11:00 -04:00
|
|
|
static void
|
2021-11-09 13:28:54 +01:00
|
|
|
_add_and_activate_connection_done(GObject *object,
|
2019-10-03 16:30:25 +02:00
|
|
|
GAsyncResult *result,
|
|
|
|
|
gboolean use_add_and_activate_v2,
|
2021-11-09 13:28:54 +01:00
|
|
|
GTask *task_take)
|
2014-07-24 08:53:33 -04:00
|
|
|
{
|
2021-11-09 13:28:54 +01:00
|
|
|
_nm_unused gs_unref_object GTask *task = task_take;
|
|
|
|
|
gs_unref_variant GVariant *ret = NULL;
|
|
|
|
|
GError *error = NULL;
|
|
|
|
|
gs_unref_variant GVariant *v_result = NULL;
|
|
|
|
|
const char *v_active_connection;
|
|
|
|
|
const char *v_path;
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2019-10-03 16:30:25 +02:00
|
|
|
ret = g_dbus_connection_call_finish(G_DBUS_CONNECTION(object), result, &error);
|
|
|
|
|
if (!ret) {
|
2020-01-23 10:36:24 +01:00
|
|
|
if (!nm_utils_error_is_cancelled(error))
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
g_dbus_error_strip_remote_error(error);
|
2019-10-03 16:30:25 +02:00
|
|
|
g_task_return_error(task, error);
|
|
|
|
|
return;
|
|
|
|
|
}
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2019-10-03 16:30:25 +02:00
|
|
|
if (use_add_and_activate_v2) {
|
|
|
|
|
g_variant_get(ret, "(&o&o@a{sv})", &v_path, &v_active_connection, &v_result);
|
|
|
|
|
} else {
|
|
|
|
|
g_variant_get(ret, "(&o&o)", &v_path, &v_active_connection);
|
|
|
|
|
}
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
_request_wait_start(g_steal_pointer(&task),
|
|
|
|
|
"AddAndActivateConnection",
|
|
|
|
|
NM_TYPE_ACTIVE_CONNECTION,
|
|
|
|
|
v_active_connection,
|
|
|
|
|
g_steal_pointer(&v_result));
|
2019-10-03 16:30:25 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
_add_and_activate_connection_v1_cb(GObject *object, GAsyncResult *result, gpointer user_data)
|
|
|
|
|
{
|
|
|
|
|
_add_and_activate_connection_done(object, result, FALSE, user_data);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
_add_and_activate_connection_v2_cb(GObject *object, GAsyncResult *result, gpointer user_data)
|
|
|
|
|
{
|
|
|
|
|
_add_and_activate_connection_done(object, result, TRUE, user_data);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
2021-11-09 13:28:54 +01:00
|
|
|
_add_and_activate_connection(NMClient *self,
|
2019-10-03 16:30:25 +02:00
|
|
|
gboolean is_v2,
|
2021-11-09 13:28:54 +01:00
|
|
|
NMConnection *partial,
|
|
|
|
|
NMDevice *device,
|
|
|
|
|
const char *specific_object,
|
|
|
|
|
GVariant *options,
|
|
|
|
|
GCancellable *cancellable,
|
2019-10-03 16:30:25 +02:00
|
|
|
GAsyncReadyCallback callback,
|
|
|
|
|
gpointer user_data)
|
|
|
|
|
{
|
2021-11-09 13:28:54 +01:00
|
|
|
GVariant *arg_connection = NULL;
|
2019-10-03 16:30:25 +02:00
|
|
|
gboolean use_add_and_activate_v2 = FALSE;
|
|
|
|
|
const char *arg_device = NULL;
|
|
|
|
|
gpointer source_tag;
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
g_return_if_fail(NM_IS_CLIENT(self));
|
2019-10-03 16:30:25 +02:00
|
|
|
g_return_if_fail(!partial || NM_IS_CONNECTION(partial));
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2019-10-03 16:30:25 +02:00
|
|
|
if (device) {
|
|
|
|
|
g_return_if_fail(NM_IS_DEVICE(device));
|
|
|
|
|
arg_device = nm_object_get_path(NM_OBJECT(device));
|
|
|
|
|
g_return_if_fail(arg_device);
|
|
|
|
|
}
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2019-10-03 16:30:25 +02:00
|
|
|
if (partial)
|
|
|
|
|
arg_connection = nm_connection_to_dbus(partial, NM_CONNECTION_SERIALIZE_ALL);
|
|
|
|
|
if (!arg_connection)
|
2021-04-14 21:12:02 +02:00
|
|
|
arg_connection = nm_g_variant_singleton_aLsaLsvII();
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2019-10-03 16:30:25 +02:00
|
|
|
if (is_v2) {
|
|
|
|
|
if (!options)
|
2021-04-14 21:12:02 +02:00
|
|
|
options = nm_g_variant_singleton_aLsvI();
|
2019-10-03 16:30:25 +02:00
|
|
|
use_add_and_activate_v2 = TRUE;
|
|
|
|
|
source_tag = nm_client_add_and_activate_connection2;
|
|
|
|
|
} else {
|
|
|
|
|
if (options) {
|
|
|
|
|
if (g_variant_n_children(options) > 0)
|
|
|
|
|
use_add_and_activate_v2 = TRUE;
|
|
|
|
|
else
|
|
|
|
|
nm_clear_pointer(&options, nm_g_variant_unref_floating);
|
|
|
|
|
}
|
|
|
|
|
source_tag = nm_client_add_and_activate_connection_async;
|
|
|
|
|
}
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
NML_NMCLIENT_LOG_D(self, "AddAndActivateConnection() started...");
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2019-10-03 16:30:25 +02:00
|
|
|
if (use_add_and_activate_v2) {
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
_nm_client_dbus_call(self,
|
|
|
|
|
self,
|
2019-10-03 16:30:25 +02:00
|
|
|
source_tag,
|
|
|
|
|
cancellable,
|
|
|
|
|
callback,
|
|
|
|
|
user_data,
|
|
|
|
|
NM_DBUS_PATH,
|
|
|
|
|
NM_DBUS_INTERFACE,
|
|
|
|
|
"AddAndActivateConnection2",
|
|
|
|
|
g_variant_new("(@a{sa{sv}}oo@a{sv})",
|
|
|
|
|
arg_connection,
|
|
|
|
|
arg_device ?: "/",
|
|
|
|
|
specific_object ?: "/",
|
|
|
|
|
options),
|
|
|
|
|
G_VARIANT_TYPE("(ooa{sv})"),
|
|
|
|
|
G_DBUS_CALL_FLAGS_NONE,
|
|
|
|
|
NM_DBUS_DEFAULT_TIMEOUT_MSEC,
|
|
|
|
|
_add_and_activate_connection_v2_cb);
|
|
|
|
|
} else {
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
_nm_client_dbus_call(self,
|
|
|
|
|
self,
|
2019-10-03 16:30:25 +02:00
|
|
|
source_tag,
|
|
|
|
|
cancellable,
|
|
|
|
|
callback,
|
|
|
|
|
user_data,
|
|
|
|
|
NM_DBUS_PATH,
|
|
|
|
|
NM_DBUS_INTERFACE,
|
|
|
|
|
"AddAndActivateConnection",
|
|
|
|
|
g_variant_new("(@a{sa{sv}}oo)",
|
|
|
|
|
arg_connection,
|
|
|
|
|
arg_device ?: "/",
|
|
|
|
|
specific_object ?: "/"),
|
|
|
|
|
G_VARIANT_TYPE("(oo)"),
|
|
|
|
|
G_DBUS_CALL_FLAGS_NONE,
|
|
|
|
|
NM_DBUS_DEFAULT_TIMEOUT_MSEC,
|
|
|
|
|
_add_and_activate_connection_v1_cb);
|
|
|
|
|
}
|
2014-07-24 08:53:33 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
2014-09-18 16:11:00 -04:00
|
|
|
* nm_client_add_and_activate_connection_async:
|
2014-07-24 08:53:33 -04:00
|
|
|
* @client: a #NMClient
|
2023-03-01 01:21:38 +01:00
|
|
|
* @partial: (nullable): an #NMConnection to add; the connection may be
|
2014-09-18 16:11:00 -04:00
|
|
|
* partially filled (or even %NULL) and will be completed by NetworkManager
|
|
|
|
|
* using the given @device and @specific_object before being added
|
2023-03-01 01:21:38 +01:00
|
|
|
* @device: (nullable): the #NMDevice
|
|
|
|
|
* @specific_object: (nullable): the object path of a connection-type-specific
|
2014-09-18 16:11:00 -04:00
|
|
|
* object this activation should use. This parameter is currently ignored for
|
|
|
|
|
* wired and mobile broadband connections, and the value of %NULL should be used
|
|
|
|
|
* (ie, no specific object). For Wi-Fi or WiMAX connections, pass the object
|
|
|
|
|
* path of a #NMAccessPoint or #NMWimaxNsp owned by @device, which you can
|
|
|
|
|
* get using nm_object_get_path(), and which will be used to complete the
|
|
|
|
|
* details of the newly added connection.
|
2019-10-03 16:30:25 +02:00
|
|
|
* If the variant is floating, it will be consumed.
|
2014-09-18 16:11:00 -04:00
|
|
|
* @cancellable: a #GCancellable, or %NULL
|
|
|
|
|
* @callback: callback to be called when the activation has started
|
|
|
|
|
* @user_data: caller-specific data passed to @callback
|
2014-07-24 08:53:33 -04:00
|
|
|
*
|
2014-09-18 16:11:00 -04:00
|
|
|
* Adds a new connection using the given details (if any) as a template,
|
|
|
|
|
* automatically filling in missing settings with the capabilities of the given
|
|
|
|
|
* device and specific object. The new connection is then asynchronously
|
|
|
|
|
* activated as with nm_client_activate_connection_async(). Cannot be used for
|
|
|
|
|
* VPN connections at this time.
|
2014-10-01 11:02:16 +02:00
|
|
|
*
|
2014-09-18 16:11:00 -04:00
|
|
|
* Note that the callback is invoked when NetworkManager has started activating
|
|
|
|
|
* the new connection, not when it finishes. You can used the returned
|
|
|
|
|
* #NMActiveConnection object (in particular, #NMActiveConnection:state) to
|
|
|
|
|
* track the activation to its completion.
|
2014-07-24 08:53:33 -04:00
|
|
|
**/
|
2014-09-18 16:11:00 -04:00
|
|
|
void
|
2021-11-09 13:28:54 +01:00
|
|
|
nm_client_add_and_activate_connection_async(NMClient *client,
|
|
|
|
|
NMConnection *partial,
|
|
|
|
|
NMDevice *device,
|
|
|
|
|
const char *specific_object,
|
|
|
|
|
GCancellable *cancellable,
|
2014-09-18 16:11:00 -04:00
|
|
|
GAsyncReadyCallback callback,
|
|
|
|
|
gpointer user_data)
|
2014-07-24 08:53:33 -04:00
|
|
|
{
|
2019-10-03 16:30:25 +02:00
|
|
|
_add_and_activate_connection(client,
|
|
|
|
|
FALSE,
|
|
|
|
|
partial,
|
|
|
|
|
device,
|
|
|
|
|
specific_object,
|
|
|
|
|
NULL,
|
|
|
|
|
cancellable,
|
|
|
|
|
callback,
|
|
|
|
|
user_data);
|
2014-07-24 08:53:33 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
2014-09-18 16:11:00 -04:00
|
|
|
* nm_client_add_and_activate_connection_finish:
|
|
|
|
|
* @client: an #NMClient
|
|
|
|
|
* @result: the result passed to the #GAsyncReadyCallback
|
|
|
|
|
* @error: location for a #GError, or %NULL
|
2014-07-24 08:53:33 -04:00
|
|
|
*
|
2014-09-18 16:11:00 -04:00
|
|
|
* Gets the result of a call to nm_client_add_and_activate_connection_async().
|
2014-07-24 08:53:33 -04:00
|
|
|
*
|
2014-09-18 16:11:00 -04:00
|
|
|
* You can call nm_active_connection_get_connection() on the returned
|
|
|
|
|
* #NMActiveConnection to find the path of the created #NMConnection.
|
|
|
|
|
*
|
|
|
|
|
* Returns: (transfer full): the new #NMActiveConnection on success, %NULL on
|
|
|
|
|
* failure, in which case @error will be set.
|
2014-07-24 08:53:33 -04:00
|
|
|
**/
|
2014-09-18 16:11:00 -04:00
|
|
|
NMActiveConnection *
|
|
|
|
|
nm_client_add_and_activate_connection_finish(NMClient *client, GAsyncResult *result, GError **error)
|
2014-07-24 08:53:33 -04:00
|
|
|
{
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
return NM_ACTIVE_CONNECTION(_request_wait_finish(client,
|
|
|
|
|
result,
|
|
|
|
|
nm_client_add_and_activate_connection_async,
|
|
|
|
|
NULL,
|
|
|
|
|
error));
|
2018-10-26 13:44:38 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
all: return output dictionary from "AddAndActivate2"
Add a "a{sv}" output argument to "AddAndActivate2" D-Bus API.
"AddAndActivate2" replaces "AddAndActivate" with more options.
It also has a dictionary argument to be forward compatible so that we
hopefully won't need an "AddAndActivate3". However, it lacked a similar
output dictionary. Add it for future extensibility. I think this is
really to workaround a shortcoming of D-Bus, which does provide strong
typing and type information about its API, but does not allow to extend
an existing API in a backward compatible manner. So we either resort to
Method(), Method2(), Method3() variants, or a catch-all variant with a
generic "a{sv}" input/output argument.
In libnm, rename "nm_client_add_and_activate_connection_options()" to
"nm_client_add_and_activate_connection2()". I think libnm API should have
an obvious correspondence with D-Bus API. Or stated differently, if
"AddAndActivateOptions" would be a better name, then the D-Bus API should
be renamed. We should prefer one name over the other, but regardless
of which is preferred, the naming for D-Bus and libnm API should
correspond.
In this case, I do think that AddAndActivate2() is a better name than
AddAndActivateOptions(). Hence I rename the libnm API.
Also, unless necessary, let libnm still call "AddAndActivate" instead of
"AddAndActivate2". Our backward compatibility works the way that libnm
requires a server version at least as new as itself. As such, libnm
theoretically could assume that server version is new enough to support
"AddAndActivate2" and could always use the more powerful variant.
However, we don't need to break compatibility intentionally and for
little gain. Here, it's easy to let libnm also handle old server API, by
continuing to use "AddAndActivate" for nm_client_add_and_activate_connection().
Note that during package update, we don't restart the currently running
NetworkManager instance. In such a scenario, it can easily happen that
nmcli/libnm is newer than the server version. Let's try a bit harder
to not break that.
Changes as discussed in [1].
[1] https://gitlab.freedesktop.org/NetworkManager/NetworkManager/merge_requests/37#note_79876
2018-12-20 07:48:31 +01:00
|
|
|
* nm_client_add_and_activate_connection2:
|
2018-10-26 13:44:38 +02:00
|
|
|
* @client: a #NMClient
|
2023-03-01 01:21:38 +01:00
|
|
|
* @partial: (nullable): an #NMConnection to add; the connection may be
|
2018-10-26 13:44:38 +02:00
|
|
|
* partially filled (or even %NULL) and will be completed by NetworkManager
|
|
|
|
|
* using the given @device and @specific_object before being added
|
2023-03-01 01:21:38 +01:00
|
|
|
* @device: (nullable): the #NMDevice
|
|
|
|
|
* @specific_object: (nullable): the object path of a connection-type-specific
|
2018-10-26 13:44:38 +02:00
|
|
|
* object this activation should use. This parameter is currently ignored for
|
|
|
|
|
* wired and mobile broadband connections, and the value of %NULL should be used
|
2020-04-27 15:54:49 +02:00
|
|
|
* (i.e., no specific object). For Wi-Fi or WiMAX connections, pass the object
|
2018-10-26 13:44:38 +02:00
|
|
|
* path of a #NMAccessPoint or #NMWimaxNsp owned by @device, which you can
|
|
|
|
|
* get using nm_object_get_path(), and which will be used to complete the
|
|
|
|
|
* details of the newly added connection.
|
|
|
|
|
* @options: a #GVariant containing a dictionary with options, or %NULL
|
|
|
|
|
* @cancellable: a #GCancellable, or %NULL
|
|
|
|
|
* @callback: callback to be called when the activation has started
|
|
|
|
|
* @user_data: caller-specific data passed to @callback
|
|
|
|
|
*
|
|
|
|
|
* Adds a new connection using the given details (if any) as a template,
|
|
|
|
|
* automatically filling in missing settings with the capabilities of the given
|
|
|
|
|
* device and specific object. The new connection is then asynchronously
|
|
|
|
|
* activated as with nm_client_activate_connection_async(). Cannot be used for
|
|
|
|
|
* VPN connections at this time.
|
|
|
|
|
*
|
|
|
|
|
* Note that the callback is invoked when NetworkManager has started activating
|
|
|
|
|
* the new connection, not when it finishes. You can used the returned
|
|
|
|
|
* #NMActiveConnection object (in particular, #NMActiveConnection:state) to
|
|
|
|
|
* track the activation to its completion.
|
|
|
|
|
*
|
2020-04-27 15:54:49 +02:00
|
|
|
* This is identical to nm_client_add_and_activate_connection_async() but takes
|
2020-07-01 17:20:40 -04:00
|
|
|
* a further @options parameter. Currently, the following options are supported
|
2018-10-26 13:44:38 +02:00
|
|
|
* by the daemon:
|
|
|
|
|
* * "persist": A string describing how the connection should be stored.
|
|
|
|
|
* The default is "disk", but it can be modified to "memory" (until
|
|
|
|
|
* the daemon quits) or "volatile" (will be deleted on disconnect).
|
2018-11-19 11:16:44 +01:00
|
|
|
* * "bind-activation": Bind the connection lifetime to something. The default is "none",
|
2018-10-26 13:44:38 +02:00
|
|
|
* meaning an explicit disconnect is needed. The value "dbus-client"
|
2018-11-19 11:16:44 +01:00
|
|
|
* means the connection will automatically be deactivated when the calling
|
2020-04-27 15:54:49 +02:00
|
|
|
* D-Bus client disappears from the system bus.
|
2018-10-26 13:44:38 +02:00
|
|
|
*
|
|
|
|
|
* Since: 1.16
|
|
|
|
|
**/
|
|
|
|
|
void
|
2021-11-09 13:28:54 +01:00
|
|
|
nm_client_add_and_activate_connection2(NMClient *client,
|
|
|
|
|
NMConnection *partial,
|
|
|
|
|
NMDevice *device,
|
|
|
|
|
const char *specific_object,
|
|
|
|
|
GVariant *options,
|
|
|
|
|
GCancellable *cancellable,
|
all: return output dictionary from "AddAndActivate2"
Add a "a{sv}" output argument to "AddAndActivate2" D-Bus API.
"AddAndActivate2" replaces "AddAndActivate" with more options.
It also has a dictionary argument to be forward compatible so that we
hopefully won't need an "AddAndActivate3". However, it lacked a similar
output dictionary. Add it for future extensibility. I think this is
really to workaround a shortcoming of D-Bus, which does provide strong
typing and type information about its API, but does not allow to extend
an existing API in a backward compatible manner. So we either resort to
Method(), Method2(), Method3() variants, or a catch-all variant with a
generic "a{sv}" input/output argument.
In libnm, rename "nm_client_add_and_activate_connection_options()" to
"nm_client_add_and_activate_connection2()". I think libnm API should have
an obvious correspondence with D-Bus API. Or stated differently, if
"AddAndActivateOptions" would be a better name, then the D-Bus API should
be renamed. We should prefer one name over the other, but regardless
of which is preferred, the naming for D-Bus and libnm API should
correspond.
In this case, I do think that AddAndActivate2() is a better name than
AddAndActivateOptions(). Hence I rename the libnm API.
Also, unless necessary, let libnm still call "AddAndActivate" instead of
"AddAndActivate2". Our backward compatibility works the way that libnm
requires a server version at least as new as itself. As such, libnm
theoretically could assume that server version is new enough to support
"AddAndActivate2" and could always use the more powerful variant.
However, we don't need to break compatibility intentionally and for
little gain. Here, it's easy to let libnm also handle old server API, by
continuing to use "AddAndActivate" for nm_client_add_and_activate_connection().
Note that during package update, we don't restart the currently running
NetworkManager instance. In such a scenario, it can easily happen that
nmcli/libnm is newer than the server version. Let's try a bit harder
to not break that.
Changes as discussed in [1].
[1] https://gitlab.freedesktop.org/NetworkManager/NetworkManager/merge_requests/37#note_79876
2018-12-20 07:48:31 +01:00
|
|
|
GAsyncReadyCallback callback,
|
|
|
|
|
gpointer user_data)
|
2018-10-26 13:44:38 +02:00
|
|
|
{
|
2019-10-03 16:30:25 +02:00
|
|
|
_add_and_activate_connection(client,
|
|
|
|
|
TRUE,
|
|
|
|
|
partial,
|
|
|
|
|
device,
|
|
|
|
|
specific_object,
|
|
|
|
|
options,
|
|
|
|
|
cancellable,
|
|
|
|
|
callback,
|
|
|
|
|
user_data);
|
2018-10-26 13:44:38 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
all: return output dictionary from "AddAndActivate2"
Add a "a{sv}" output argument to "AddAndActivate2" D-Bus API.
"AddAndActivate2" replaces "AddAndActivate" with more options.
It also has a dictionary argument to be forward compatible so that we
hopefully won't need an "AddAndActivate3". However, it lacked a similar
output dictionary. Add it for future extensibility. I think this is
really to workaround a shortcoming of D-Bus, which does provide strong
typing and type information about its API, but does not allow to extend
an existing API in a backward compatible manner. So we either resort to
Method(), Method2(), Method3() variants, or a catch-all variant with a
generic "a{sv}" input/output argument.
In libnm, rename "nm_client_add_and_activate_connection_options()" to
"nm_client_add_and_activate_connection2()". I think libnm API should have
an obvious correspondence with D-Bus API. Or stated differently, if
"AddAndActivateOptions" would be a better name, then the D-Bus API should
be renamed. We should prefer one name over the other, but regardless
of which is preferred, the naming for D-Bus and libnm API should
correspond.
In this case, I do think that AddAndActivate2() is a better name than
AddAndActivateOptions(). Hence I rename the libnm API.
Also, unless necessary, let libnm still call "AddAndActivate" instead of
"AddAndActivate2". Our backward compatibility works the way that libnm
requires a server version at least as new as itself. As such, libnm
theoretically could assume that server version is new enough to support
"AddAndActivate2" and could always use the more powerful variant.
However, we don't need to break compatibility intentionally and for
little gain. Here, it's easy to let libnm also handle old server API, by
continuing to use "AddAndActivate" for nm_client_add_and_activate_connection().
Note that during package update, we don't restart the currently running
NetworkManager instance. In such a scenario, it can easily happen that
nmcli/libnm is newer than the server version. Let's try a bit harder
to not break that.
Changes as discussed in [1].
[1] https://gitlab.freedesktop.org/NetworkManager/NetworkManager/merge_requests/37#note_79876
2018-12-20 07:48:31 +01:00
|
|
|
* nm_client_add_and_activate_connection2_finish:
|
2018-10-26 13:44:38 +02:00
|
|
|
* @client: an #NMClient
|
|
|
|
|
* @result: the result passed to the #GAsyncReadyCallback
|
|
|
|
|
* @error: location for a #GError, or %NULL
|
2023-03-02 12:18:30 +01:00
|
|
|
* @out_result: (out) (optional) (nullable) (transfer full): the output result
|
2020-07-01 17:20:40 -04:00
|
|
|
* of type "a{sv}" returned by D-Bus' AddAndActivate2 call. Currently, no
|
all: return output dictionary from "AddAndActivate2"
Add a "a{sv}" output argument to "AddAndActivate2" D-Bus API.
"AddAndActivate2" replaces "AddAndActivate" with more options.
It also has a dictionary argument to be forward compatible so that we
hopefully won't need an "AddAndActivate3". However, it lacked a similar
output dictionary. Add it for future extensibility. I think this is
really to workaround a shortcoming of D-Bus, which does provide strong
typing and type information about its API, but does not allow to extend
an existing API in a backward compatible manner. So we either resort to
Method(), Method2(), Method3() variants, or a catch-all variant with a
generic "a{sv}" input/output argument.
In libnm, rename "nm_client_add_and_activate_connection_options()" to
"nm_client_add_and_activate_connection2()". I think libnm API should have
an obvious correspondence with D-Bus API. Or stated differently, if
"AddAndActivateOptions" would be a better name, then the D-Bus API should
be renamed. We should prefer one name over the other, but regardless
of which is preferred, the naming for D-Bus and libnm API should
correspond.
In this case, I do think that AddAndActivate2() is a better name than
AddAndActivateOptions(). Hence I rename the libnm API.
Also, unless necessary, let libnm still call "AddAndActivate" instead of
"AddAndActivate2". Our backward compatibility works the way that libnm
requires a server version at least as new as itself. As such, libnm
theoretically could assume that server version is new enough to support
"AddAndActivate2" and could always use the more powerful variant.
However, we don't need to break compatibility intentionally and for
little gain. Here, it's easy to let libnm also handle old server API, by
continuing to use "AddAndActivate" for nm_client_add_and_activate_connection().
Note that during package update, we don't restart the currently running
NetworkManager instance. In such a scenario, it can easily happen that
nmcli/libnm is newer than the server version. Let's try a bit harder
to not break that.
Changes as discussed in [1].
[1] https://gitlab.freedesktop.org/NetworkManager/NetworkManager/merge_requests/37#note_79876
2018-12-20 07:48:31 +01:00
|
|
|
* output is implemented yet.
|
2018-10-26 13:44:38 +02:00
|
|
|
*
|
all: return output dictionary from "AddAndActivate2"
Add a "a{sv}" output argument to "AddAndActivate2" D-Bus API.
"AddAndActivate2" replaces "AddAndActivate" with more options.
It also has a dictionary argument to be forward compatible so that we
hopefully won't need an "AddAndActivate3". However, it lacked a similar
output dictionary. Add it for future extensibility. I think this is
really to workaround a shortcoming of D-Bus, which does provide strong
typing and type information about its API, but does not allow to extend
an existing API in a backward compatible manner. So we either resort to
Method(), Method2(), Method3() variants, or a catch-all variant with a
generic "a{sv}" input/output argument.
In libnm, rename "nm_client_add_and_activate_connection_options()" to
"nm_client_add_and_activate_connection2()". I think libnm API should have
an obvious correspondence with D-Bus API. Or stated differently, if
"AddAndActivateOptions" would be a better name, then the D-Bus API should
be renamed. We should prefer one name over the other, but regardless
of which is preferred, the naming for D-Bus and libnm API should
correspond.
In this case, I do think that AddAndActivate2() is a better name than
AddAndActivateOptions(). Hence I rename the libnm API.
Also, unless necessary, let libnm still call "AddAndActivate" instead of
"AddAndActivate2". Our backward compatibility works the way that libnm
requires a server version at least as new as itself. As such, libnm
theoretically could assume that server version is new enough to support
"AddAndActivate2" and could always use the more powerful variant.
However, we don't need to break compatibility intentionally and for
little gain. Here, it's easy to let libnm also handle old server API, by
continuing to use "AddAndActivate" for nm_client_add_and_activate_connection().
Note that during package update, we don't restart the currently running
NetworkManager instance. In such a scenario, it can easily happen that
nmcli/libnm is newer than the server version. Let's try a bit harder
to not break that.
Changes as discussed in [1].
[1] https://gitlab.freedesktop.org/NetworkManager/NetworkManager/merge_requests/37#note_79876
2018-12-20 07:48:31 +01:00
|
|
|
* Gets the result of a call to nm_client_add_and_activate_connection2().
|
2018-10-26 13:44:38 +02:00
|
|
|
*
|
|
|
|
|
* You can call nm_active_connection_get_connection() on the returned
|
|
|
|
|
* #NMActiveConnection to find the path of the created #NMConnection.
|
|
|
|
|
*
|
|
|
|
|
* Returns: (transfer full): the new #NMActiveConnection on success, %NULL on
|
|
|
|
|
* failure, in which case @error will be set.
|
2022-11-07 21:13:28 +01:00
|
|
|
*
|
|
|
|
|
* Since: 1.16
|
2018-10-26 13:44:38 +02:00
|
|
|
**/
|
|
|
|
|
NMActiveConnection *
|
2021-11-09 13:28:54 +01:00
|
|
|
nm_client_add_and_activate_connection2_finish(NMClient *client,
|
all: return output dictionary from "AddAndActivate2"
Add a "a{sv}" output argument to "AddAndActivate2" D-Bus API.
"AddAndActivate2" replaces "AddAndActivate" with more options.
It also has a dictionary argument to be forward compatible so that we
hopefully won't need an "AddAndActivate3". However, it lacked a similar
output dictionary. Add it for future extensibility. I think this is
really to workaround a shortcoming of D-Bus, which does provide strong
typing and type information about its API, but does not allow to extend
an existing API in a backward compatible manner. So we either resort to
Method(), Method2(), Method3() variants, or a catch-all variant with a
generic "a{sv}" input/output argument.
In libnm, rename "nm_client_add_and_activate_connection_options()" to
"nm_client_add_and_activate_connection2()". I think libnm API should have
an obvious correspondence with D-Bus API. Or stated differently, if
"AddAndActivateOptions" would be a better name, then the D-Bus API should
be renamed. We should prefer one name over the other, but regardless
of which is preferred, the naming for D-Bus and libnm API should
correspond.
In this case, I do think that AddAndActivate2() is a better name than
AddAndActivateOptions(). Hence I rename the libnm API.
Also, unless necessary, let libnm still call "AddAndActivate" instead of
"AddAndActivate2". Our backward compatibility works the way that libnm
requires a server version at least as new as itself. As such, libnm
theoretically could assume that server version is new enough to support
"AddAndActivate2" and could always use the more powerful variant.
However, we don't need to break compatibility intentionally and for
little gain. Here, it's easy to let libnm also handle old server API, by
continuing to use "AddAndActivate" for nm_client_add_and_activate_connection().
Note that during package update, we don't restart the currently running
NetworkManager instance. In such a scenario, it can easily happen that
nmcli/libnm is newer than the server version. Let's try a bit harder
to not break that.
Changes as discussed in [1].
[1] https://gitlab.freedesktop.org/NetworkManager/NetworkManager/merge_requests/37#note_79876
2018-12-20 07:48:31 +01:00
|
|
|
GAsyncResult *result,
|
2021-11-09 13:28:54 +01:00
|
|
|
GVariant **out_result,
|
|
|
|
|
GError **error)
|
2018-10-26 13:44:38 +02:00
|
|
|
{
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
return NM_ACTIVE_CONNECTION(_request_wait_finish(client,
|
|
|
|
|
result,
|
2020-01-15 12:19:35 +01:00
|
|
|
nm_client_add_and_activate_connection2,
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
out_result,
|
|
|
|
|
error));
|
2014-07-24 08:53:33 -04:00
|
|
|
}
|
|
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
/*****************************************************************************/
|
|
|
|
|
|
2014-07-24 08:53:33 -04:00
|
|
|
/**
|
2014-09-18 16:11:00 -04:00
|
|
|
* nm_client_deactivate_connection:
|
2014-07-24 08:53:33 -04:00
|
|
|
* @client: a #NMClient
|
2014-09-18 16:11:00 -04:00
|
|
|
* @active: the #NMActiveConnection to deactivate
|
|
|
|
|
* @cancellable: a #GCancellable, or %NULL
|
|
|
|
|
* @error: location for a #GError, or %NULL
|
2014-07-24 08:53:33 -04:00
|
|
|
*
|
2014-09-18 16:11:00 -04:00
|
|
|
* Deactivates an active #NMActiveConnection.
|
2014-07-24 08:53:33 -04:00
|
|
|
*
|
2014-09-18 16:11:00 -04:00
|
|
|
* Returns: success or failure
|
libnm: deprecate synchronous/blocking API in libnm
Note that D-Bus is fundamentally asynchronous. Doing blocking calls
on top of D-Bus is odd, especially for libnm's NMClient. That is because
NMClient essentially is a client-side cache of the objects from the D-Bus
interface. This cache should be filled exclusively by (asynchronous) D-Bus
events (PropertiesChanged). So, making a blocking D-Bus call means to wait
for a response and return it, while queuing all messages that are received
in the meantime.
Basically there are three ways how a synchronous API on NMClient could behave:
1) the call just calls g_dbus_connection_call_sync(). This means
that libnm sends a D-Bus request via GDBusConnection, and blockingly
waits for the response. All D-Bus messages that get received in the
meantime are queued in the GMainContext that belongs to NMClient.
That means, none of these D-Bus events are processed until we
iterate the GMainContext after the call returns. The effect is,
that NMClient (and all cached objects in there) are unaffected by
the D-Bus request.
Most of the synchronous API calls in libnm are of this kind.
The problem is that the strict ordering of D-Bus events gets
violated.
For some API this is not an immediate problem. Take for example
nm_device_wifi_request_scan(). The call merely blockingly tells
NetworkManager to start scanning, but since NetworkManager's D-Bus
API does not directly expose any state that tells whether we are
currently scanning, this out of order processing of the D-Bus
request is a small issue.
The problem is more obvious for nm_client_networking_set_enabled().
After calling it, NM_CLIENT_NETWORKING_ENABLED is still unaffected
and unchanged, because the PropertiesChanged signal from D-Bus
is not yet processed.
This means, while you make such a blocking call, NMClient's state
does not change. But usually you perform the synchronous call
to change some state. In this form, the blocking call is not useful,
because NMClient only changes the state after iterating the GMainContext,
and not after the blocking call returns.
2) like 1), but after making the blocking g_dbus_connection_call_sync(),
update the NMClient cache artificially. This is what
nm_manager_check_connectivity() does, to "fix" bgo#784629.
This also has the problem of out-of-order events, but it kinda
solves the problem of not changing the state during the blocking
call. But it does so by hacking the state of the cache. I think
this is really wrong because the state should only be updated from
the ordered stream of D-Bus messages (PropertiesChanged signal and
similar). When libnm decides to modify the state, there may be already
D-Bus messages queued that affect this very state.
3) instead of calling g_dbus_connection_call_sync(), use the
asynchronous g_dbus_connection_call(). If we would use a sepaate
GMainContext for all D-Bus related calls, we could ensure that
while we block for the response, we iterate that internal main context.
This might be nice, because all events are processed in order and
after the blocking call returns, the NMClient state is up to date.
The are problems however: current blocking API does not do this,
so it's a significant change in behavior. Also, it might be
unexpected to the user that during the blocking call the entire
content of NMClient's cache might change and all pointers to the
cache might be invalidated. Also, of course NMClient would invoke
signals for all the changes that happen.
Another problem is that this would be more effort to implement
and it involves a small performance overhead for all D-Bus related
calls (because we have to serialize all events in an internal
GMainContext first and then invoke them on the caller's context).
Also, if the users wants this behavior, they could implement it themself
by running libnm in their own GMainContext. Note that libnm might
have bugs to make that really working, but that should be fixed
instead of adding such synchrnous API behavior.
Read also [1], for why blocking calls are wrong.
[1] https://smcv.pseudorandom.co.uk/2008/11/nonblocking/
So, all possible behaviors for synchronous API have severe behavioural
issues. Mark all this API as deprecated. Also, this serves the purpose of
identifying blocking D-Bus calls in libnm.
Note that "deprecated" here does not really mean that the API is going
to be removed. We don't break API. The user may:
- continue to use this API. It's deprecated, awkward and discouraged,
but if it works, by all means use it.
- use asynchronous API. That's the only sensible way to use D-Bus.
If libnm lacks a certain asynchronous counterpart, it should be
added.
- use GDBusConnection directly. There really isn't anything wrong
with D-Bus or GDBusConnection. This deprecated API is just a wrapper
around g_dbus_connection_call_sync(). You may call it directly
without feeling dirty.
---
The only other remainging API is the synchronous GInitable call for
NMClient. That is an entirely separate beast and not particularly
wrong (from an API point of view).
Note that synchronous API in NMSecretAgentOld, NMVpnPluginOld and
NMVpnServicePlugin as not deprecated here. These types are not part
of the D-Bus cache and while they have similar issues, it's less severe
because they have less state.
2019-09-04 13:58:43 +02:00
|
|
|
*
|
2020-03-13 19:45:09 +01:00
|
|
|
* Deprecated: 1.22: Use nm_client_deactivate_connection_async() or GDBusConnection.
|
2014-07-24 08:53:33 -04:00
|
|
|
**/
|
|
|
|
|
gboolean
|
2021-11-09 13:28:54 +01:00
|
|
|
nm_client_deactivate_connection(NMClient *client,
|
2014-09-18 16:11:00 -04:00
|
|
|
NMActiveConnection *active,
|
2021-11-09 13:28:54 +01:00
|
|
|
GCancellable *cancellable,
|
|
|
|
|
GError **error)
|
2014-07-24 08:53:33 -04:00
|
|
|
{
|
2019-10-01 16:51:10 +02:00
|
|
|
const char *active_path;
|
|
|
|
|
|
2014-07-24 08:53:33 -04:00
|
|
|
g_return_val_if_fail(NM_IS_CLIENT(client), FALSE);
|
2014-09-18 16:11:00 -04:00
|
|
|
g_return_val_if_fail(NM_IS_ACTIVE_CONNECTION(active), FALSE);
|
2014-07-24 08:53:33 -04:00
|
|
|
|
2019-10-01 16:51:10 +02:00
|
|
|
active_path = nm_object_get_path(NM_OBJECT(active));
|
|
|
|
|
g_return_val_if_fail(active_path, FALSE);
|
|
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
return _nm_client_dbus_call_sync_void(client,
|
2019-10-01 16:51:10 +02:00
|
|
|
cancellable,
|
|
|
|
|
NM_DBUS_PATH,
|
|
|
|
|
NM_DBUS_INTERFACE,
|
|
|
|
|
"DeactivateConnection",
|
|
|
|
|
g_variant_new("(o)", active_path),
|
|
|
|
|
G_DBUS_CALL_FLAGS_NONE,
|
|
|
|
|
NM_DBUS_DEFAULT_TIMEOUT_MSEC,
|
|
|
|
|
TRUE,
|
|
|
|
|
error);
|
2014-09-18 16:11:00 -04:00
|
|
|
}
|
|
|
|
|
|
2014-07-24 08:53:33 -04:00
|
|
|
/**
|
2014-09-18 16:11:00 -04:00
|
|
|
* nm_client_deactivate_connection_async:
|
2014-07-24 08:53:33 -04:00
|
|
|
* @client: a #NMClient
|
2014-09-18 16:11:00 -04:00
|
|
|
* @active: the #NMActiveConnection to deactivate
|
|
|
|
|
* @cancellable: a #GCancellable, or %NULL
|
|
|
|
|
* @callback: callback to be called when the deactivation has completed
|
|
|
|
|
* @user_data: caller-specific data passed to @callback
|
2014-07-24 08:53:33 -04:00
|
|
|
*
|
2014-09-18 16:11:00 -04:00
|
|
|
* Asynchronously deactivates an active #NMActiveConnection.
|
2014-07-24 08:53:33 -04:00
|
|
|
**/
|
2014-09-18 16:11:00 -04:00
|
|
|
void
|
2021-11-09 13:28:54 +01:00
|
|
|
nm_client_deactivate_connection_async(NMClient *client,
|
2014-09-18 16:11:00 -04:00
|
|
|
NMActiveConnection *active,
|
2021-11-09 13:28:54 +01:00
|
|
|
GCancellable *cancellable,
|
2014-09-18 16:11:00 -04:00
|
|
|
GAsyncReadyCallback callback,
|
|
|
|
|
gpointer user_data)
|
2014-07-24 08:53:33 -04:00
|
|
|
{
|
2019-10-02 13:38:36 +02:00
|
|
|
const char *active_path;
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2014-09-18 16:11:00 -04:00
|
|
|
g_return_if_fail(NM_IS_CLIENT(client));
|
|
|
|
|
g_return_if_fail(NM_IS_ACTIVE_CONNECTION(active));
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2019-10-02 13:38:36 +02:00
|
|
|
active_path = nm_object_get_path(NM_OBJECT(active));
|
|
|
|
|
g_return_if_fail(active_path);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
_nm_client_dbus_call(client,
|
|
|
|
|
client,
|
2019-10-02 13:38:36 +02:00
|
|
|
nm_client_deactivate_connection_async,
|
|
|
|
|
cancellable,
|
|
|
|
|
callback,
|
|
|
|
|
user_data,
|
|
|
|
|
NM_DBUS_PATH,
|
|
|
|
|
NM_DBUS_INTERFACE,
|
|
|
|
|
"DeactivateConnection",
|
|
|
|
|
g_variant_new("(o)", active_path),
|
|
|
|
|
G_VARIANT_TYPE("()"),
|
|
|
|
|
G_DBUS_CALL_FLAGS_NONE,
|
|
|
|
|
NM_DBUS_DEFAULT_TIMEOUT_MSEC,
|
|
|
|
|
nm_dbus_connection_call_finish_void_strip_dbus_error_cb);
|
2014-07-24 08:53:33 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
2014-09-18 16:11:00 -04:00
|
|
|
* nm_client_deactivate_connection_finish:
|
|
|
|
|
* @client: a #NMClient
|
|
|
|
|
* @result: the result passed to the #GAsyncReadyCallback
|
|
|
|
|
* @error: location for a #GError, or %NULL
|
2014-07-24 08:53:33 -04:00
|
|
|
*
|
2014-09-18 16:11:00 -04:00
|
|
|
* Gets the result of a call to nm_client_deactivate_connection_async().
|
2014-07-24 08:53:33 -04:00
|
|
|
*
|
2014-09-18 16:11:00 -04:00
|
|
|
* Returns: success or failure
|
|
|
|
|
**/
|
|
|
|
|
gboolean
|
|
|
|
|
nm_client_deactivate_connection_finish(NMClient *client, GAsyncResult *result, GError **error)
|
2014-07-24 08:53:33 -04:00
|
|
|
{
|
2014-10-07 09:26:40 -04:00
|
|
|
g_return_val_if_fail(NM_IS_CLIENT(client), FALSE);
|
2019-10-02 13:38:36 +02:00
|
|
|
g_return_val_if_fail(nm_g_task_is_valid(result, client, nm_client_deactivate_connection_async),
|
|
|
|
|
FALSE);
|
2014-07-24 08:53:33 -04:00
|
|
|
|
2019-10-02 13:38:36 +02:00
|
|
|
return g_task_propagate_boolean(G_TASK(result), error);
|
2014-07-24 08:53:33 -04:00
|
|
|
}
|
|
|
|
|
|
2016-10-02 18:22:50 +02:00
|
|
|
/*****************************************************************************/
|
2014-09-29 10:58:16 -04:00
|
|
|
/* Connections */
|
2016-10-02 18:22:50 +02:00
|
|
|
/*****************************************************************************/
|
2014-09-29 10:58:16 -04:00
|
|
|
|
|
|
|
|
/**
|
2014-10-22 12:32:46 -04:00
|
|
|
* nm_client_get_connections:
|
2014-09-29 10:58:16 -04:00
|
|
|
* @client: the %NMClient
|
|
|
|
|
*
|
2014-10-22 12:32:46 -04:00
|
|
|
* Returns: (transfer none) (element-type NMRemoteConnection): an array
|
|
|
|
|
* containing all connections provided by the remote settings service. The
|
|
|
|
|
* returned array is owned by the #NMClient object and should not be modified.
|
2016-03-23 17:36:28 +01:00
|
|
|
*
|
|
|
|
|
* The connections are as received from D-Bus and might not validate according
|
|
|
|
|
* to nm_connection_verify().
|
2014-09-29 10:58:16 -04:00
|
|
|
**/
|
2014-10-22 12:32:46 -04:00
|
|
|
const GPtrArray *
|
|
|
|
|
nm_client_get_connections(NMClient *client)
|
2014-09-29 10:58:16 -04:00
|
|
|
{
|
|
|
|
|
g_return_val_if_fail(NM_IS_CLIENT(client), NULL);
|
|
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
return nml_dbus_property_ao_get_objs_as_ptrarray(
|
|
|
|
|
&NM_CLIENT_GET_PRIVATE(client)->settings.connections);
|
2014-09-29 10:58:16 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* nm_client_get_connection_by_id:
|
|
|
|
|
* @client: the %NMClient
|
|
|
|
|
* @id: the id of the remote connection
|
|
|
|
|
*
|
|
|
|
|
* Returns the first matching %NMRemoteConnection matching a given @id.
|
|
|
|
|
*
|
|
|
|
|
* Returns: (transfer none): the remote connection object on success, or %NULL if no
|
|
|
|
|
* matching object was found.
|
2016-03-23 17:36:28 +01:00
|
|
|
*
|
|
|
|
|
* The connection is as received from D-Bus and might not validate according
|
|
|
|
|
* to nm_connection_verify().
|
2014-09-29 10:58:16 -04:00
|
|
|
**/
|
|
|
|
|
NMRemoteConnection *
|
|
|
|
|
nm_client_get_connection_by_id(NMClient *client, const char *id)
|
|
|
|
|
{
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
const GPtrArray *arr;
|
|
|
|
|
guint i;
|
|
|
|
|
|
2014-09-29 10:58:16 -04:00
|
|
|
g_return_val_if_fail(NM_IS_CLIENT(client), NULL);
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
g_return_val_if_fail(id, NULL);
|
2014-09-29 10:58:16 -04:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
arr = nm_client_get_connections(client);
|
|
|
|
|
for (i = 0; i < arr->len; i++) {
|
|
|
|
|
NMRemoteConnection *c = NM_REMOTE_CONNECTION(arr->pdata[i]);
|
2016-10-18 16:35:07 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
if (nm_streq0(id, nm_connection_get_id(NM_CONNECTION(c))))
|
|
|
|
|
return c;
|
|
|
|
|
}
|
|
|
|
|
return NULL;
|
2014-09-29 10:58:16 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* nm_client_get_connection_by_path:
|
|
|
|
|
* @client: the %NMClient
|
|
|
|
|
* @path: the D-Bus object path of the remote connection
|
|
|
|
|
*
|
|
|
|
|
* Returns the %NMRemoteConnection representing the connection at @path.
|
|
|
|
|
*
|
|
|
|
|
* Returns: (transfer none): the remote connection object on success, or %NULL if the object was
|
|
|
|
|
* not known
|
2016-03-23 17:36:28 +01:00
|
|
|
*
|
|
|
|
|
* The connection is as received from D-Bus and might not validate according
|
|
|
|
|
* to nm_connection_verify().
|
2014-09-29 10:58:16 -04:00
|
|
|
**/
|
|
|
|
|
NMRemoteConnection *
|
|
|
|
|
nm_client_get_connection_by_path(NMClient *client, const char *path)
|
|
|
|
|
{
|
|
|
|
|
g_return_val_if_fail(NM_IS_CLIENT(client), NULL);
|
|
|
|
|
g_return_val_if_fail(path != NULL, NULL);
|
|
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
return _dbobjs_get_nmobj_unpack_visible(client, path, NM_TYPE_REMOTE_CONNECTION);
|
2014-09-29 10:58:16 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* nm_client_get_connection_by_uuid:
|
|
|
|
|
* @client: the %NMClient
|
|
|
|
|
* @uuid: the UUID of the remote connection
|
|
|
|
|
*
|
|
|
|
|
* Returns the %NMRemoteConnection identified by @uuid.
|
|
|
|
|
*
|
|
|
|
|
* Returns: (transfer none): the remote connection object on success, or %NULL if the object was
|
|
|
|
|
* not known
|
2016-03-23 17:36:28 +01:00
|
|
|
*
|
|
|
|
|
* The connection is as received from D-Bus and might not validate according
|
|
|
|
|
* to nm_connection_verify().
|
2014-09-29 10:58:16 -04:00
|
|
|
**/
|
|
|
|
|
NMRemoteConnection *
|
|
|
|
|
nm_client_get_connection_by_uuid(NMClient *client, const char *uuid)
|
|
|
|
|
{
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
const GPtrArray *arr;
|
|
|
|
|
guint i;
|
|
|
|
|
|
2014-09-29 10:58:16 -04:00
|
|
|
g_return_val_if_fail(NM_IS_CLIENT(client), NULL);
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
g_return_val_if_fail(uuid, NULL);
|
2014-09-29 10:58:16 -04:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
arr = nm_client_get_connections(client);
|
|
|
|
|
for (i = 0; i < arr->len; i++) {
|
|
|
|
|
NMRemoteConnection *c = NM_REMOTE_CONNECTION(arr->pdata[i]);
|
2016-10-18 16:35:07 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
if (nm_streq0(uuid, nm_connection_get_uuid(NM_CONNECTION(c))))
|
|
|
|
|
return c;
|
|
|
|
|
}
|
|
|
|
|
return NULL;
|
2014-09-29 10:58:16 -04:00
|
|
|
}
|
|
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
/*****************************************************************************/
|
|
|
|
|
|
2014-09-29 10:58:16 -04:00
|
|
|
static void
|
2021-11-09 13:28:54 +01:00
|
|
|
_add_connection_cb(GObject *source,
|
2019-10-07 09:48:09 +02:00
|
|
|
GAsyncResult *result,
|
|
|
|
|
gboolean with_extra_arg,
|
|
|
|
|
gpointer user_data)
|
2014-09-29 10:58:16 -04:00
|
|
|
{
|
2019-10-07 09:48:09 +02:00
|
|
|
gs_unref_variant GVariant *ret = NULL;
|
2021-11-09 13:28:54 +01:00
|
|
|
gs_unref_object GTask *task = user_data;
|
2019-10-07 09:48:09 +02:00
|
|
|
gs_unref_variant GVariant *v_result = NULL;
|
2021-11-09 13:28:54 +01:00
|
|
|
const char *v_path;
|
|
|
|
|
GError *error = NULL;
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2019-10-07 09:48:09 +02:00
|
|
|
ret = g_dbus_connection_call_finish(G_DBUS_CONNECTION(source), result, &error);
|
|
|
|
|
if (!ret) {
|
2020-01-23 10:36:24 +01:00
|
|
|
if (!nm_utils_error_is_cancelled(error))
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
g_dbus_error_strip_remote_error(error);
|
2019-10-07 09:48:09 +02:00
|
|
|
g_task_return_error(task, error);
|
|
|
|
|
return;
|
|
|
|
|
}
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2019-10-07 09:48:09 +02:00
|
|
|
if (with_extra_arg) {
|
|
|
|
|
g_variant_get(ret, "(&o@a{sv})", &v_path, &v_result);
|
|
|
|
|
} else {
|
|
|
|
|
g_variant_get(ret, "(&o)", &v_path);
|
|
|
|
|
}
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
_request_wait_start(g_steal_pointer(&task),
|
|
|
|
|
"AddConnection",
|
|
|
|
|
NM_TYPE_REMOTE_CONNECTION,
|
|
|
|
|
v_path,
|
|
|
|
|
g_steal_pointer(&v_result));
|
core,libnm: add AddConnection2() D-Bus API to block autoconnect from the start
It should be possible to add a profile with autoconnect blocked form the
start. Update2() has a %NM_SETTINGS_UPDATE2_FLAG_BLOCK_AUTOCONNECT flag to
block autoconnect, and so we need something similar when adding a connection.
As the existing AddConnection() and AddConnectionUnsaved() API is not
extensible, add AddConnection2() that has flags and room for additional
arguments.
Then add and implement the new flag %NM_SETTINGS_ADD_CONNECTION2_FLAG_BLOCK_AUTOCONNECT
for AddConnection2().
Note that libnm's nm_client_add_connection2() API can completely replace
the existing nm_client_add_connection_async() call. In particular, it
will automatically prefer to call the D-Bus methods AddConnection() and
AddConnectionUnsaved(), in order to work with server versions older than
1.20. The purpose of this is that when upgrading the package, the
running NetworkManager might still be older than the installed libnm.
Anyway, so since nm_client_add_connection2_finish() also has a result
output, the caller needs to decide whether he cares about that result.
Hence it has an argument ignore_out_result, which allows to fallback to
the old API. One might argue that a caller who doesn't care about the
output results while still wanting to be backward compatible, should
itself choose to call nm_client_add_connection_async() or
nm_client_add_connection2(). But instead, it's more convenient if the
new function can fully replace the old one, so that the caller does not
need to switch which start/finish method to call.
https://bugzilla.redhat.com/show_bug.cgi?id=1677068
2019-07-09 15:22:01 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
2019-10-07 09:48:09 +02:00
|
|
|
_add_connection_cb_without_extra_result(GObject *object, GAsyncResult *result, gpointer user_data)
|
core,libnm: add AddConnection2() D-Bus API to block autoconnect from the start
It should be possible to add a profile with autoconnect blocked form the
start. Update2() has a %NM_SETTINGS_UPDATE2_FLAG_BLOCK_AUTOCONNECT flag to
block autoconnect, and so we need something similar when adding a connection.
As the existing AddConnection() and AddConnectionUnsaved() API is not
extensible, add AddConnection2() that has flags and room for additional
arguments.
Then add and implement the new flag %NM_SETTINGS_ADD_CONNECTION2_FLAG_BLOCK_AUTOCONNECT
for AddConnection2().
Note that libnm's nm_client_add_connection2() API can completely replace
the existing nm_client_add_connection_async() call. In particular, it
will automatically prefer to call the D-Bus methods AddConnection() and
AddConnectionUnsaved(), in order to work with server versions older than
1.20. The purpose of this is that when upgrading the package, the
running NetworkManager might still be older than the installed libnm.
Anyway, so since nm_client_add_connection2_finish() also has a result
output, the caller needs to decide whether he cares about that result.
Hence it has an argument ignore_out_result, which allows to fallback to
the old API. One might argue that a caller who doesn't care about the
output results while still wanting to be backward compatible, should
itself choose to call nm_client_add_connection_async() or
nm_client_add_connection2(). But instead, it's more convenient if the
new function can fully replace the old one, so that the caller does not
need to switch which start/finish method to call.
https://bugzilla.redhat.com/show_bug.cgi?id=1677068
2019-07-09 15:22:01 +02:00
|
|
|
{
|
2019-10-07 09:48:09 +02:00
|
|
|
_add_connection_cb(object, result, FALSE, user_data);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
_add_connection_cb_with_extra_result(GObject *object, GAsyncResult *result, gpointer user_data)
|
|
|
|
|
{
|
|
|
|
|
_add_connection_cb(object, result, TRUE, user_data);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
2021-11-09 13:28:54 +01:00
|
|
|
_add_connection_call(NMClient *self,
|
2019-10-07 09:48:09 +02:00
|
|
|
gpointer source_tag,
|
|
|
|
|
gboolean ignore_out_result,
|
2021-11-09 13:28:54 +01:00
|
|
|
GVariant *settings,
|
2019-10-07 09:48:09 +02:00
|
|
|
NMSettingsAddConnection2Flags flags,
|
2021-11-09 13:28:54 +01:00
|
|
|
GVariant *args,
|
|
|
|
|
GCancellable *cancellable,
|
2019-10-07 09:48:09 +02:00
|
|
|
GAsyncReadyCallback callback,
|
|
|
|
|
gpointer user_data)
|
|
|
|
|
{
|
|
|
|
|
g_return_if_fail(NM_IS_CLIENT(self));
|
|
|
|
|
g_return_if_fail(!settings || g_variant_is_of_type(settings, G_VARIANT_TYPE("a{sa{sv}}")));
|
|
|
|
|
g_return_if_fail(!args || g_variant_is_of_type(args, G_VARIANT_TYPE("a{sv}")));
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
NML_NMCLIENT_LOG_D(self, "AddConnection() started...");
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2019-10-07 09:48:09 +02:00
|
|
|
if (!settings)
|
2021-04-14 21:12:02 +02:00
|
|
|
settings = nm_g_variant_singleton_aLsaLsvII();
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2019-10-07 09:48:09 +02:00
|
|
|
/* Although AddConnection2() being capable to handle also AddConnection() and
|
|
|
|
|
* AddConnectionUnsaved() variants, we prefer to use the old D-Bus methods when
|
|
|
|
|
* they are sufficient. The reason is that libnm should avoid hard dependencies
|
|
|
|
|
* on 1.20 API whenever possible. */
|
|
|
|
|
if (ignore_out_result && flags == NM_SETTINGS_ADD_CONNECTION2_FLAG_TO_DISK) {
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
_nm_client_dbus_call(self,
|
|
|
|
|
self,
|
2019-10-07 09:48:09 +02:00
|
|
|
source_tag,
|
|
|
|
|
cancellable,
|
|
|
|
|
callback,
|
|
|
|
|
user_data,
|
|
|
|
|
NM_DBUS_PATH_SETTINGS,
|
|
|
|
|
NM_DBUS_INTERFACE_SETTINGS,
|
|
|
|
|
"AddConnection",
|
|
|
|
|
g_variant_new("(@a{sa{sv}})", settings),
|
|
|
|
|
G_VARIANT_TYPE("(o)"),
|
|
|
|
|
G_DBUS_CALL_FLAGS_NONE,
|
|
|
|
|
NM_DBUS_DEFAULT_TIMEOUT_MSEC,
|
|
|
|
|
_add_connection_cb_without_extra_result);
|
|
|
|
|
} else if (ignore_out_result && flags == NM_SETTINGS_ADD_CONNECTION2_FLAG_IN_MEMORY) {
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
_nm_client_dbus_call(self,
|
|
|
|
|
self,
|
2019-10-07 09:48:09 +02:00
|
|
|
source_tag,
|
|
|
|
|
cancellable,
|
|
|
|
|
callback,
|
|
|
|
|
user_data,
|
|
|
|
|
NM_DBUS_PATH_SETTINGS,
|
|
|
|
|
NM_DBUS_INTERFACE_SETTINGS,
|
|
|
|
|
"AddConnectionUnsaved",
|
|
|
|
|
g_variant_new("(@a{sa{sv}})", settings),
|
|
|
|
|
G_VARIANT_TYPE("(o)"),
|
|
|
|
|
G_DBUS_CALL_FLAGS_NONE,
|
|
|
|
|
NM_DBUS_DEFAULT_TIMEOUT_MSEC,
|
|
|
|
|
_add_connection_cb_without_extra_result);
|
core,libnm: add AddConnection2() D-Bus API to block autoconnect from the start
It should be possible to add a profile with autoconnect blocked form the
start. Update2() has a %NM_SETTINGS_UPDATE2_FLAG_BLOCK_AUTOCONNECT flag to
block autoconnect, and so we need something similar when adding a connection.
As the existing AddConnection() and AddConnectionUnsaved() API is not
extensible, add AddConnection2() that has flags and room for additional
arguments.
Then add and implement the new flag %NM_SETTINGS_ADD_CONNECTION2_FLAG_BLOCK_AUTOCONNECT
for AddConnection2().
Note that libnm's nm_client_add_connection2() API can completely replace
the existing nm_client_add_connection_async() call. In particular, it
will automatically prefer to call the D-Bus methods AddConnection() and
AddConnectionUnsaved(), in order to work with server versions older than
1.20. The purpose of this is that when upgrading the package, the
running NetworkManager might still be older than the installed libnm.
Anyway, so since nm_client_add_connection2_finish() also has a result
output, the caller needs to decide whether he cares about that result.
Hence it has an argument ignore_out_result, which allows to fallback to
the old API. One might argue that a caller who doesn't care about the
output results while still wanting to be backward compatible, should
itself choose to call nm_client_add_connection_async() or
nm_client_add_connection2(). But instead, it's more convenient if the
new function can fully replace the old one, so that the caller does not
need to switch which start/finish method to call.
https://bugzilla.redhat.com/show_bug.cgi?id=1677068
2019-07-09 15:22:01 +02:00
|
|
|
} else {
|
2021-04-14 21:12:02 +02:00
|
|
|
_nm_client_dbus_call(self,
|
|
|
|
|
self,
|
|
|
|
|
source_tag,
|
|
|
|
|
cancellable,
|
|
|
|
|
callback,
|
|
|
|
|
user_data,
|
|
|
|
|
NM_DBUS_PATH_SETTINGS,
|
|
|
|
|
NM_DBUS_INTERFACE_SETTINGS,
|
|
|
|
|
"AddConnection2",
|
|
|
|
|
g_variant_new("(@a{sa{sv}}u@a{sv})",
|
|
|
|
|
settings,
|
|
|
|
|
(guint32) flags,
|
|
|
|
|
args ?: nm_g_variant_singleton_aLsvI()),
|
|
|
|
|
G_VARIANT_TYPE("(oa{sv})"),
|
|
|
|
|
G_DBUS_CALL_FLAGS_NONE,
|
|
|
|
|
NM_DBUS_DEFAULT_TIMEOUT_MSEC,
|
|
|
|
|
_add_connection_cb_with_extra_result);
|
core,libnm: add AddConnection2() D-Bus API to block autoconnect from the start
It should be possible to add a profile with autoconnect blocked form the
start. Update2() has a %NM_SETTINGS_UPDATE2_FLAG_BLOCK_AUTOCONNECT flag to
block autoconnect, and so we need something similar when adding a connection.
As the existing AddConnection() and AddConnectionUnsaved() API is not
extensible, add AddConnection2() that has flags and room for additional
arguments.
Then add and implement the new flag %NM_SETTINGS_ADD_CONNECTION2_FLAG_BLOCK_AUTOCONNECT
for AddConnection2().
Note that libnm's nm_client_add_connection2() API can completely replace
the existing nm_client_add_connection_async() call. In particular, it
will automatically prefer to call the D-Bus methods AddConnection() and
AddConnectionUnsaved(), in order to work with server versions older than
1.20. The purpose of this is that when upgrading the package, the
running NetworkManager might still be older than the installed libnm.
Anyway, so since nm_client_add_connection2_finish() also has a result
output, the caller needs to decide whether he cares about that result.
Hence it has an argument ignore_out_result, which allows to fallback to
the old API. One might argue that a caller who doesn't care about the
output results while still wanting to be backward compatible, should
itself choose to call nm_client_add_connection_async() or
nm_client_add_connection2(). But instead, it's more convenient if the
new function can fully replace the old one, so that the caller does not
need to switch which start/finish method to call.
https://bugzilla.redhat.com/show_bug.cgi?id=1677068
2019-07-09 15:22:01 +02:00
|
|
|
}
|
2019-10-07 09:48:09 +02:00
|
|
|
}
|
|
|
|
|
|
2014-09-29 10:58:16 -04:00
|
|
|
/**
|
|
|
|
|
* nm_client_add_connection_async:
|
|
|
|
|
* @client: the %NMClient
|
|
|
|
|
* @connection: the connection to add. Note that this object's settings will be
|
|
|
|
|
* added, not the object itself
|
|
|
|
|
* @save_to_disk: whether to immediately save the connection to disk
|
|
|
|
|
* @cancellable: a #GCancellable, or %NULL
|
2024-11-27 12:51:24 +01:00
|
|
|
* @callback: (scope async) (closure user_data): callback to be called when the add operation completes
|
|
|
|
|
* @user_data: caller-specific data passed to @callback
|
2014-09-29 10:58:16 -04:00
|
|
|
*
|
|
|
|
|
* Requests that the remote settings service add the given settings to a new
|
|
|
|
|
* connection. If @save_to_disk is %TRUE, the connection is immediately written
|
|
|
|
|
* to disk; otherwise it is initially only stored in memory, but may be saved
|
|
|
|
|
* later by calling the connection's nm_remote_connection_commit_changes()
|
|
|
|
|
* method.
|
|
|
|
|
*
|
|
|
|
|
* @connection is untouched by this function and only serves as a template of
|
|
|
|
|
* the settings to add. The #NMRemoteConnection object that represents what
|
|
|
|
|
* NetworkManager actually added is returned to @callback when the addition
|
|
|
|
|
* operation is complete.
|
|
|
|
|
*
|
|
|
|
|
* Note that the #NMRemoteConnection returned in @callback may not contain
|
|
|
|
|
* identical settings to @connection as NetworkManager may perform automatic
|
|
|
|
|
* completion and/or normalization of connection properties.
|
|
|
|
|
**/
|
|
|
|
|
void
|
2021-11-09 13:28:54 +01:00
|
|
|
nm_client_add_connection_async(NMClient *client,
|
|
|
|
|
NMConnection *connection,
|
2014-09-29 10:58:16 -04:00
|
|
|
gboolean save_to_disk,
|
2021-11-09 13:28:54 +01:00
|
|
|
GCancellable *cancellable,
|
2014-09-29 10:58:16 -04:00
|
|
|
GAsyncReadyCallback callback,
|
|
|
|
|
gpointer user_data)
|
|
|
|
|
{
|
|
|
|
|
g_return_if_fail(NM_IS_CONNECTION(connection));
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2019-10-07 09:48:09 +02:00
|
|
|
_add_connection_call(client,
|
|
|
|
|
nm_client_add_connection_async,
|
|
|
|
|
TRUE,
|
|
|
|
|
nm_connection_to_dbus(connection, NM_CONNECTION_SERIALIZE_ALL),
|
|
|
|
|
save_to_disk ? NM_SETTINGS_ADD_CONNECTION2_FLAG_TO_DISK
|
|
|
|
|
: NM_SETTINGS_ADD_CONNECTION2_FLAG_IN_MEMORY,
|
|
|
|
|
NULL,
|
|
|
|
|
cancellable,
|
|
|
|
|
callback,
|
|
|
|
|
user_data);
|
2014-09-29 10:58:16 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* nm_client_add_connection_finish:
|
|
|
|
|
* @client: an #NMClient
|
|
|
|
|
* @result: the result passed to the #GAsyncReadyCallback
|
|
|
|
|
* @error: location for a #GError, or %NULL
|
|
|
|
|
*
|
|
|
|
|
* Gets the result of a call to nm_client_add_connection_async().
|
|
|
|
|
*
|
|
|
|
|
* Returns: (transfer full): the new #NMRemoteConnection on success, %NULL on
|
|
|
|
|
* failure, in which case @error will be set.
|
|
|
|
|
**/
|
|
|
|
|
NMRemoteConnection *
|
|
|
|
|
nm_client_add_connection_finish(NMClient *client, GAsyncResult *result, GError **error)
|
|
|
|
|
{
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
return NM_REMOTE_CONNECTION(
|
|
|
|
|
_request_wait_finish(client, result, nm_client_add_connection_async, NULL, error));
|
core,libnm: add AddConnection2() D-Bus API to block autoconnect from the start
It should be possible to add a profile with autoconnect blocked form the
start. Update2() has a %NM_SETTINGS_UPDATE2_FLAG_BLOCK_AUTOCONNECT flag to
block autoconnect, and so we need something similar when adding a connection.
As the existing AddConnection() and AddConnectionUnsaved() API is not
extensible, add AddConnection2() that has flags and room for additional
arguments.
Then add and implement the new flag %NM_SETTINGS_ADD_CONNECTION2_FLAG_BLOCK_AUTOCONNECT
for AddConnection2().
Note that libnm's nm_client_add_connection2() API can completely replace
the existing nm_client_add_connection_async() call. In particular, it
will automatically prefer to call the D-Bus methods AddConnection() and
AddConnectionUnsaved(), in order to work with server versions older than
1.20. The purpose of this is that when upgrading the package, the
running NetworkManager might still be older than the installed libnm.
Anyway, so since nm_client_add_connection2_finish() also has a result
output, the caller needs to decide whether he cares about that result.
Hence it has an argument ignore_out_result, which allows to fallback to
the old API. One might argue that a caller who doesn't care about the
output results while still wanting to be backward compatible, should
itself choose to call nm_client_add_connection_async() or
nm_client_add_connection2(). But instead, it's more convenient if the
new function can fully replace the old one, so that the caller does not
need to switch which start/finish method to call.
https://bugzilla.redhat.com/show_bug.cgi?id=1677068
2019-07-09 15:22:01 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* nm_client_add_connection2:
|
|
|
|
|
* @client: the %NMClient
|
|
|
|
|
* @settings: the "a{sa{sv}}" #GVariant with the content of the setting.
|
|
|
|
|
* @flags: the %NMSettingsAddConnection2Flags argument.
|
2023-03-01 01:21:38 +01:00
|
|
|
* @args: (nullable): the "a{sv}" #GVariant with extra argument or %NULL
|
core,libnm: add AddConnection2() D-Bus API to block autoconnect from the start
It should be possible to add a profile with autoconnect blocked form the
start. Update2() has a %NM_SETTINGS_UPDATE2_FLAG_BLOCK_AUTOCONNECT flag to
block autoconnect, and so we need something similar when adding a connection.
As the existing AddConnection() and AddConnectionUnsaved() API is not
extensible, add AddConnection2() that has flags and room for additional
arguments.
Then add and implement the new flag %NM_SETTINGS_ADD_CONNECTION2_FLAG_BLOCK_AUTOCONNECT
for AddConnection2().
Note that libnm's nm_client_add_connection2() API can completely replace
the existing nm_client_add_connection_async() call. In particular, it
will automatically prefer to call the D-Bus methods AddConnection() and
AddConnectionUnsaved(), in order to work with server versions older than
1.20. The purpose of this is that when upgrading the package, the
running NetworkManager might still be older than the installed libnm.
Anyway, so since nm_client_add_connection2_finish() also has a result
output, the caller needs to decide whether he cares about that result.
Hence it has an argument ignore_out_result, which allows to fallback to
the old API. One might argue that a caller who doesn't care about the
output results while still wanting to be backward compatible, should
itself choose to call nm_client_add_connection_async() or
nm_client_add_connection2(). But instead, it's more convenient if the
new function can fully replace the old one, so that the caller does not
need to switch which start/finish method to call.
https://bugzilla.redhat.com/show_bug.cgi?id=1677068
2019-07-09 15:22:01 +02:00
|
|
|
* for no extra arguments.
|
|
|
|
|
* @ignore_out_result: this function wraps AddConnection2(), which has an
|
|
|
|
|
* additional result "a{sv}" output parameter. By setting this to %TRUE,
|
|
|
|
|
* you signal that you are not interested in that output parameter.
|
|
|
|
|
* This allows the function to fall back to AddConnection() and AddConnectionUnsaved(),
|
|
|
|
|
* which is interesting if you run against an older server version that does
|
|
|
|
|
* not yet provide AddConnection2(). By setting this to %FALSE, the function
|
|
|
|
|
* under the hood always calls AddConnection2().
|
|
|
|
|
* @cancellable: a #GCancellable, or %NULL
|
2024-11-27 12:51:24 +01:00
|
|
|
* @callback: (scope async) (closure user_data): callback to be called when the add operation completes
|
|
|
|
|
* @user_data: caller-specific data passed to @callback
|
core,libnm: add AddConnection2() D-Bus API to block autoconnect from the start
It should be possible to add a profile with autoconnect blocked form the
start. Update2() has a %NM_SETTINGS_UPDATE2_FLAG_BLOCK_AUTOCONNECT flag to
block autoconnect, and so we need something similar when adding a connection.
As the existing AddConnection() and AddConnectionUnsaved() API is not
extensible, add AddConnection2() that has flags and room for additional
arguments.
Then add and implement the new flag %NM_SETTINGS_ADD_CONNECTION2_FLAG_BLOCK_AUTOCONNECT
for AddConnection2().
Note that libnm's nm_client_add_connection2() API can completely replace
the existing nm_client_add_connection_async() call. In particular, it
will automatically prefer to call the D-Bus methods AddConnection() and
AddConnectionUnsaved(), in order to work with server versions older than
1.20. The purpose of this is that when upgrading the package, the
running NetworkManager might still be older than the installed libnm.
Anyway, so since nm_client_add_connection2_finish() also has a result
output, the caller needs to decide whether he cares about that result.
Hence it has an argument ignore_out_result, which allows to fallback to
the old API. One might argue that a caller who doesn't care about the
output results while still wanting to be backward compatible, should
itself choose to call nm_client_add_connection_async() or
nm_client_add_connection2(). But instead, it's more convenient if the
new function can fully replace the old one, so that the caller does not
need to switch which start/finish method to call.
https://bugzilla.redhat.com/show_bug.cgi?id=1677068
2019-07-09 15:22:01 +02:00
|
|
|
*
|
|
|
|
|
* Call AddConnection2() D-Bus API asynchronously.
|
|
|
|
|
*
|
|
|
|
|
* Since: 1.20
|
|
|
|
|
**/
|
|
|
|
|
void
|
2021-11-09 13:28:54 +01:00
|
|
|
nm_client_add_connection2(NMClient *client,
|
|
|
|
|
GVariant *settings,
|
core,libnm: add AddConnection2() D-Bus API to block autoconnect from the start
It should be possible to add a profile with autoconnect blocked form the
start. Update2() has a %NM_SETTINGS_UPDATE2_FLAG_BLOCK_AUTOCONNECT flag to
block autoconnect, and so we need something similar when adding a connection.
As the existing AddConnection() and AddConnectionUnsaved() API is not
extensible, add AddConnection2() that has flags and room for additional
arguments.
Then add and implement the new flag %NM_SETTINGS_ADD_CONNECTION2_FLAG_BLOCK_AUTOCONNECT
for AddConnection2().
Note that libnm's nm_client_add_connection2() API can completely replace
the existing nm_client_add_connection_async() call. In particular, it
will automatically prefer to call the D-Bus methods AddConnection() and
AddConnectionUnsaved(), in order to work with server versions older than
1.20. The purpose of this is that when upgrading the package, the
running NetworkManager might still be older than the installed libnm.
Anyway, so since nm_client_add_connection2_finish() also has a result
output, the caller needs to decide whether he cares about that result.
Hence it has an argument ignore_out_result, which allows to fallback to
the old API. One might argue that a caller who doesn't care about the
output results while still wanting to be backward compatible, should
itself choose to call nm_client_add_connection_async() or
nm_client_add_connection2(). But instead, it's more convenient if the
new function can fully replace the old one, so that the caller does not
need to switch which start/finish method to call.
https://bugzilla.redhat.com/show_bug.cgi?id=1677068
2019-07-09 15:22:01 +02:00
|
|
|
NMSettingsAddConnection2Flags flags,
|
2021-11-09 13:28:54 +01:00
|
|
|
GVariant *args,
|
core,libnm: add AddConnection2() D-Bus API to block autoconnect from the start
It should be possible to add a profile with autoconnect blocked form the
start. Update2() has a %NM_SETTINGS_UPDATE2_FLAG_BLOCK_AUTOCONNECT flag to
block autoconnect, and so we need something similar when adding a connection.
As the existing AddConnection() and AddConnectionUnsaved() API is not
extensible, add AddConnection2() that has flags and room for additional
arguments.
Then add and implement the new flag %NM_SETTINGS_ADD_CONNECTION2_FLAG_BLOCK_AUTOCONNECT
for AddConnection2().
Note that libnm's nm_client_add_connection2() API can completely replace
the existing nm_client_add_connection_async() call. In particular, it
will automatically prefer to call the D-Bus methods AddConnection() and
AddConnectionUnsaved(), in order to work with server versions older than
1.20. The purpose of this is that when upgrading the package, the
running NetworkManager might still be older than the installed libnm.
Anyway, so since nm_client_add_connection2_finish() also has a result
output, the caller needs to decide whether he cares about that result.
Hence it has an argument ignore_out_result, which allows to fallback to
the old API. One might argue that a caller who doesn't care about the
output results while still wanting to be backward compatible, should
itself choose to call nm_client_add_connection_async() or
nm_client_add_connection2(). But instead, it's more convenient if the
new function can fully replace the old one, so that the caller does not
need to switch which start/finish method to call.
https://bugzilla.redhat.com/show_bug.cgi?id=1677068
2019-07-09 15:22:01 +02:00
|
|
|
gboolean ignore_out_result,
|
2021-11-09 13:28:54 +01:00
|
|
|
GCancellable *cancellable,
|
core,libnm: add AddConnection2() D-Bus API to block autoconnect from the start
It should be possible to add a profile with autoconnect blocked form the
start. Update2() has a %NM_SETTINGS_UPDATE2_FLAG_BLOCK_AUTOCONNECT flag to
block autoconnect, and so we need something similar when adding a connection.
As the existing AddConnection() and AddConnectionUnsaved() API is not
extensible, add AddConnection2() that has flags and room for additional
arguments.
Then add and implement the new flag %NM_SETTINGS_ADD_CONNECTION2_FLAG_BLOCK_AUTOCONNECT
for AddConnection2().
Note that libnm's nm_client_add_connection2() API can completely replace
the existing nm_client_add_connection_async() call. In particular, it
will automatically prefer to call the D-Bus methods AddConnection() and
AddConnectionUnsaved(), in order to work with server versions older than
1.20. The purpose of this is that when upgrading the package, the
running NetworkManager might still be older than the installed libnm.
Anyway, so since nm_client_add_connection2_finish() also has a result
output, the caller needs to decide whether he cares about that result.
Hence it has an argument ignore_out_result, which allows to fallback to
the old API. One might argue that a caller who doesn't care about the
output results while still wanting to be backward compatible, should
itself choose to call nm_client_add_connection_async() or
nm_client_add_connection2(). But instead, it's more convenient if the
new function can fully replace the old one, so that the caller does not
need to switch which start/finish method to call.
https://bugzilla.redhat.com/show_bug.cgi?id=1677068
2019-07-09 15:22:01 +02:00
|
|
|
GAsyncReadyCallback callback,
|
|
|
|
|
gpointer user_data)
|
|
|
|
|
{
|
2019-10-07 09:48:09 +02:00
|
|
|
_add_connection_call(client,
|
|
|
|
|
nm_client_add_connection2,
|
|
|
|
|
ignore_out_result,
|
|
|
|
|
settings,
|
|
|
|
|
flags,
|
|
|
|
|
args,
|
|
|
|
|
cancellable,
|
|
|
|
|
callback,
|
|
|
|
|
user_data);
|
core,libnm: add AddConnection2() D-Bus API to block autoconnect from the start
It should be possible to add a profile with autoconnect blocked form the
start. Update2() has a %NM_SETTINGS_UPDATE2_FLAG_BLOCK_AUTOCONNECT flag to
block autoconnect, and so we need something similar when adding a connection.
As the existing AddConnection() and AddConnectionUnsaved() API is not
extensible, add AddConnection2() that has flags and room for additional
arguments.
Then add and implement the new flag %NM_SETTINGS_ADD_CONNECTION2_FLAG_BLOCK_AUTOCONNECT
for AddConnection2().
Note that libnm's nm_client_add_connection2() API can completely replace
the existing nm_client_add_connection_async() call. In particular, it
will automatically prefer to call the D-Bus methods AddConnection() and
AddConnectionUnsaved(), in order to work with server versions older than
1.20. The purpose of this is that when upgrading the package, the
running NetworkManager might still be older than the installed libnm.
Anyway, so since nm_client_add_connection2_finish() also has a result
output, the caller needs to decide whether he cares about that result.
Hence it has an argument ignore_out_result, which allows to fallback to
the old API. One might argue that a caller who doesn't care about the
output results while still wanting to be backward compatible, should
itself choose to call nm_client_add_connection_async() or
nm_client_add_connection2(). But instead, it's more convenient if the
new function can fully replace the old one, so that the caller does not
need to switch which start/finish method to call.
https://bugzilla.redhat.com/show_bug.cgi?id=1677068
2019-07-09 15:22:01 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* nm_client_add_connection2_finish:
|
|
|
|
|
* @client: the #NMClient
|
|
|
|
|
* @result: the #GAsyncResult
|
2023-03-02 12:18:30 +01:00
|
|
|
* @out_result: (out) (optional) (nullable) (transfer full): the output
|
2023-03-01 01:21:38 +01:00
|
|
|
* #GVariant from AddConnection2().
|
core,libnm: add AddConnection2() D-Bus API to block autoconnect from the start
It should be possible to add a profile with autoconnect blocked form the
start. Update2() has a %NM_SETTINGS_UPDATE2_FLAG_BLOCK_AUTOCONNECT flag to
block autoconnect, and so we need something similar when adding a connection.
As the existing AddConnection() and AddConnectionUnsaved() API is not
extensible, add AddConnection2() that has flags and room for additional
arguments.
Then add and implement the new flag %NM_SETTINGS_ADD_CONNECTION2_FLAG_BLOCK_AUTOCONNECT
for AddConnection2().
Note that libnm's nm_client_add_connection2() API can completely replace
the existing nm_client_add_connection_async() call. In particular, it
will automatically prefer to call the D-Bus methods AddConnection() and
AddConnectionUnsaved(), in order to work with server versions older than
1.20. The purpose of this is that when upgrading the package, the
running NetworkManager might still be older than the installed libnm.
Anyway, so since nm_client_add_connection2_finish() also has a result
output, the caller needs to decide whether he cares about that result.
Hence it has an argument ignore_out_result, which allows to fallback to
the old API. One might argue that a caller who doesn't care about the
output results while still wanting to be backward compatible, should
itself choose to call nm_client_add_connection_async() or
nm_client_add_connection2(). But instead, it's more convenient if the
new function can fully replace the old one, so that the caller does not
need to switch which start/finish method to call.
https://bugzilla.redhat.com/show_bug.cgi?id=1677068
2019-07-09 15:22:01 +02:00
|
|
|
* If you care about the output result, then the "ignore_out_result"
|
|
|
|
|
* parameter of nm_client_add_connection2() must not be set to %TRUE.
|
2023-03-01 01:21:38 +01:00
|
|
|
* @error: the error argument.
|
core,libnm: add AddConnection2() D-Bus API to block autoconnect from the start
It should be possible to add a profile with autoconnect blocked form the
start. Update2() has a %NM_SETTINGS_UPDATE2_FLAG_BLOCK_AUTOCONNECT flag to
block autoconnect, and so we need something similar when adding a connection.
As the existing AddConnection() and AddConnectionUnsaved() API is not
extensible, add AddConnection2() that has flags and room for additional
arguments.
Then add and implement the new flag %NM_SETTINGS_ADD_CONNECTION2_FLAG_BLOCK_AUTOCONNECT
for AddConnection2().
Note that libnm's nm_client_add_connection2() API can completely replace
the existing nm_client_add_connection_async() call. In particular, it
will automatically prefer to call the D-Bus methods AddConnection() and
AddConnectionUnsaved(), in order to work with server versions older than
1.20. The purpose of this is that when upgrading the package, the
running NetworkManager might still be older than the installed libnm.
Anyway, so since nm_client_add_connection2_finish() also has a result
output, the caller needs to decide whether he cares about that result.
Hence it has an argument ignore_out_result, which allows to fallback to
the old API. One might argue that a caller who doesn't care about the
output results while still wanting to be backward compatible, should
itself choose to call nm_client_add_connection_async() or
nm_client_add_connection2(). But instead, it's more convenient if the
new function can fully replace the old one, so that the caller does not
need to switch which start/finish method to call.
https://bugzilla.redhat.com/show_bug.cgi?id=1677068
2019-07-09 15:22:01 +02:00
|
|
|
*
|
|
|
|
|
* Returns: (transfer full): on success, a pointer to the added
|
|
|
|
|
* #NMRemoteConnection.
|
|
|
|
|
*
|
|
|
|
|
* Since: 1.20
|
|
|
|
|
*/
|
|
|
|
|
NMRemoteConnection *
|
2021-11-09 13:28:54 +01:00
|
|
|
nm_client_add_connection2_finish(NMClient *client,
|
core,libnm: add AddConnection2() D-Bus API to block autoconnect from the start
It should be possible to add a profile with autoconnect blocked form the
start. Update2() has a %NM_SETTINGS_UPDATE2_FLAG_BLOCK_AUTOCONNECT flag to
block autoconnect, and so we need something similar when adding a connection.
As the existing AddConnection() and AddConnectionUnsaved() API is not
extensible, add AddConnection2() that has flags and room for additional
arguments.
Then add and implement the new flag %NM_SETTINGS_ADD_CONNECTION2_FLAG_BLOCK_AUTOCONNECT
for AddConnection2().
Note that libnm's nm_client_add_connection2() API can completely replace
the existing nm_client_add_connection_async() call. In particular, it
will automatically prefer to call the D-Bus methods AddConnection() and
AddConnectionUnsaved(), in order to work with server versions older than
1.20. The purpose of this is that when upgrading the package, the
running NetworkManager might still be older than the installed libnm.
Anyway, so since nm_client_add_connection2_finish() also has a result
output, the caller needs to decide whether he cares about that result.
Hence it has an argument ignore_out_result, which allows to fallback to
the old API. One might argue that a caller who doesn't care about the
output results while still wanting to be backward compatible, should
itself choose to call nm_client_add_connection_async() or
nm_client_add_connection2(). But instead, it's more convenient if the
new function can fully replace the old one, so that the caller does not
need to switch which start/finish method to call.
https://bugzilla.redhat.com/show_bug.cgi?id=1677068
2019-07-09 15:22:01 +02:00
|
|
|
GAsyncResult *result,
|
2021-11-09 13:28:54 +01:00
|
|
|
GVariant **out_result,
|
|
|
|
|
GError **error)
|
core,libnm: add AddConnection2() D-Bus API to block autoconnect from the start
It should be possible to add a profile with autoconnect blocked form the
start. Update2() has a %NM_SETTINGS_UPDATE2_FLAG_BLOCK_AUTOCONNECT flag to
block autoconnect, and so we need something similar when adding a connection.
As the existing AddConnection() and AddConnectionUnsaved() API is not
extensible, add AddConnection2() that has flags and room for additional
arguments.
Then add and implement the new flag %NM_SETTINGS_ADD_CONNECTION2_FLAG_BLOCK_AUTOCONNECT
for AddConnection2().
Note that libnm's nm_client_add_connection2() API can completely replace
the existing nm_client_add_connection_async() call. In particular, it
will automatically prefer to call the D-Bus methods AddConnection() and
AddConnectionUnsaved(), in order to work with server versions older than
1.20. The purpose of this is that when upgrading the package, the
running NetworkManager might still be older than the installed libnm.
Anyway, so since nm_client_add_connection2_finish() also has a result
output, the caller needs to decide whether he cares about that result.
Hence it has an argument ignore_out_result, which allows to fallback to
the old API. One might argue that a caller who doesn't care about the
output results while still wanting to be backward compatible, should
itself choose to call nm_client_add_connection_async() or
nm_client_add_connection2(). But instead, it's more convenient if the
new function can fully replace the old one, so that the caller does not
need to switch which start/finish method to call.
https://bugzilla.redhat.com/show_bug.cgi?id=1677068
2019-07-09 15:22:01 +02:00
|
|
|
{
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
return NM_REMOTE_CONNECTION(
|
|
|
|
|
_request_wait_finish(client, result, nm_client_add_connection2, out_result, error));
|
2014-09-29 10:58:16 -04:00
|
|
|
}
|
|
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
/*****************************************************************************/
|
|
|
|
|
|
2014-09-29 10:58:16 -04:00
|
|
|
/**
|
|
|
|
|
* nm_client_load_connections:
|
|
|
|
|
* @client: the %NMClient
|
2016-08-04 13:38:24 +02:00
|
|
|
* @filenames: (array zero-terminated=1): %NULL-terminated array of filenames to load
|
2014-09-29 10:58:16 -04:00
|
|
|
* @failures: (out) (transfer full): on return, a %NULL-terminated array of
|
|
|
|
|
* filenames that failed to load
|
|
|
|
|
* @cancellable: a #GCancellable, or %NULL
|
|
|
|
|
* @error: return location for #GError
|
|
|
|
|
*
|
|
|
|
|
* Requests that the remote settings service load or reload the given files,
|
|
|
|
|
* adding or updating the connections described within.
|
|
|
|
|
*
|
|
|
|
|
* The changes to the indicated files will not yet be reflected in
|
|
|
|
|
* @client's connections array when the function returns.
|
|
|
|
|
*
|
|
|
|
|
* If all of the indicated files were successfully loaded, the
|
|
|
|
|
* function will return %TRUE, and @failures will be set to %NULL. If
|
|
|
|
|
* NetworkManager tried to load the files, but some (or all) failed,
|
|
|
|
|
* then @failures will be set to a %NULL-terminated array of the
|
|
|
|
|
* filenames that failed to load.
|
|
|
|
|
*
|
2019-10-06 23:19:32 +02:00
|
|
|
* Returns: %TRUE on success.
|
|
|
|
|
*
|
|
|
|
|
* Warning: before libnm 1.22, the boolean return value was inconsistent.
|
|
|
|
|
* That is made worse, because when running against certain server versions
|
|
|
|
|
* before 1.20, the server would return wrong values for success/failure.
|
|
|
|
|
* This means, if you use this function in libnm before 1.22, you are advised
|
|
|
|
|
* to ignore the boolean return value and only look at @failures and @error.
|
|
|
|
|
* With libnm >= 1.22, the boolean return value corresponds to whether @error was
|
|
|
|
|
* set. Note that even in the success case, you might have individual @failures.
|
|
|
|
|
* With 1.22, the return value is consistent with nm_client_load_connections_finish().
|
libnm: deprecate synchronous/blocking API in libnm
Note that D-Bus is fundamentally asynchronous. Doing blocking calls
on top of D-Bus is odd, especially for libnm's NMClient. That is because
NMClient essentially is a client-side cache of the objects from the D-Bus
interface. This cache should be filled exclusively by (asynchronous) D-Bus
events (PropertiesChanged). So, making a blocking D-Bus call means to wait
for a response and return it, while queuing all messages that are received
in the meantime.
Basically there are three ways how a synchronous API on NMClient could behave:
1) the call just calls g_dbus_connection_call_sync(). This means
that libnm sends a D-Bus request via GDBusConnection, and blockingly
waits for the response. All D-Bus messages that get received in the
meantime are queued in the GMainContext that belongs to NMClient.
That means, none of these D-Bus events are processed until we
iterate the GMainContext after the call returns. The effect is,
that NMClient (and all cached objects in there) are unaffected by
the D-Bus request.
Most of the synchronous API calls in libnm are of this kind.
The problem is that the strict ordering of D-Bus events gets
violated.
For some API this is not an immediate problem. Take for example
nm_device_wifi_request_scan(). The call merely blockingly tells
NetworkManager to start scanning, but since NetworkManager's D-Bus
API does not directly expose any state that tells whether we are
currently scanning, this out of order processing of the D-Bus
request is a small issue.
The problem is more obvious for nm_client_networking_set_enabled().
After calling it, NM_CLIENT_NETWORKING_ENABLED is still unaffected
and unchanged, because the PropertiesChanged signal from D-Bus
is not yet processed.
This means, while you make such a blocking call, NMClient's state
does not change. But usually you perform the synchronous call
to change some state. In this form, the blocking call is not useful,
because NMClient only changes the state after iterating the GMainContext,
and not after the blocking call returns.
2) like 1), but after making the blocking g_dbus_connection_call_sync(),
update the NMClient cache artificially. This is what
nm_manager_check_connectivity() does, to "fix" bgo#784629.
This also has the problem of out-of-order events, but it kinda
solves the problem of not changing the state during the blocking
call. But it does so by hacking the state of the cache. I think
this is really wrong because the state should only be updated from
the ordered stream of D-Bus messages (PropertiesChanged signal and
similar). When libnm decides to modify the state, there may be already
D-Bus messages queued that affect this very state.
3) instead of calling g_dbus_connection_call_sync(), use the
asynchronous g_dbus_connection_call(). If we would use a sepaate
GMainContext for all D-Bus related calls, we could ensure that
while we block for the response, we iterate that internal main context.
This might be nice, because all events are processed in order and
after the blocking call returns, the NMClient state is up to date.
The are problems however: current blocking API does not do this,
so it's a significant change in behavior. Also, it might be
unexpected to the user that during the blocking call the entire
content of NMClient's cache might change and all pointers to the
cache might be invalidated. Also, of course NMClient would invoke
signals for all the changes that happen.
Another problem is that this would be more effort to implement
and it involves a small performance overhead for all D-Bus related
calls (because we have to serialize all events in an internal
GMainContext first and then invoke them on the caller's context).
Also, if the users wants this behavior, they could implement it themself
by running libnm in their own GMainContext. Note that libnm might
have bugs to make that really working, but that should be fixed
instead of adding such synchrnous API behavior.
Read also [1], for why blocking calls are wrong.
[1] https://smcv.pseudorandom.co.uk/2008/11/nonblocking/
So, all possible behaviors for synchronous API have severe behavioural
issues. Mark all this API as deprecated. Also, this serves the purpose of
identifying blocking D-Bus calls in libnm.
Note that "deprecated" here does not really mean that the API is going
to be removed. We don't break API. The user may:
- continue to use this API. It's deprecated, awkward and discouraged,
but if it works, by all means use it.
- use asynchronous API. That's the only sensible way to use D-Bus.
If libnm lacks a certain asynchronous counterpart, it should be
added.
- use GDBusConnection directly. There really isn't anything wrong
with D-Bus or GDBusConnection. This deprecated API is just a wrapper
around g_dbus_connection_call_sync(). You may call it directly
without feeling dirty.
---
The only other remainging API is the synchronous GInitable call for
NMClient. That is an entirely separate beast and not particularly
wrong (from an API point of view).
Note that synchronous API in NMSecretAgentOld, NMVpnPluginOld and
NMVpnServicePlugin as not deprecated here. These types are not part
of the D-Bus cache and while they have similar issues, it's less severe
because they have less state.
2019-09-04 13:58:43 +02:00
|
|
|
*
|
2020-03-13 19:45:09 +01:00
|
|
|
* Deprecated: 1.22: Use nm_client_load_connections_async() or GDBusConnection.
|
2014-09-29 10:58:16 -04:00
|
|
|
**/
|
|
|
|
|
gboolean
|
2021-11-09 13:28:54 +01:00
|
|
|
nm_client_load_connections(NMClient *client,
|
|
|
|
|
char **filenames,
|
|
|
|
|
char ***failures,
|
2014-09-29 10:58:16 -04:00
|
|
|
GCancellable *cancellable,
|
2021-11-09 13:28:54 +01:00
|
|
|
GError **error)
|
2014-09-29 10:58:16 -04:00
|
|
|
{
|
2019-10-06 22:50:29 +02:00
|
|
|
gs_unref_variant GVariant *ret = NULL;
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2014-09-29 10:58:16 -04:00
|
|
|
g_return_val_if_fail(NM_IS_CLIENT(client), FALSE);
|
2019-10-06 22:50:29 +02:00
|
|
|
g_return_val_if_fail(!cancellable || G_IS_CANCELLABLE(cancellable), FALSE);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
ret = _nm_client_dbus_call_sync(client,
|
2019-10-06 22:50:29 +02:00
|
|
|
cancellable,
|
|
|
|
|
NM_DBUS_PATH_SETTINGS,
|
|
|
|
|
NM_DBUS_INTERFACE_SETTINGS,
|
|
|
|
|
"LoadConnections",
|
|
|
|
|
g_variant_new("(^as)", filenames ?: NM_PTRARRAY_EMPTY(char *)),
|
|
|
|
|
G_VARIANT_TYPE("(bas)"),
|
|
|
|
|
G_DBUS_CALL_FLAGS_NONE,
|
|
|
|
|
NM_DBUS_DEFAULT_TIMEOUT_MSEC,
|
|
|
|
|
TRUE,
|
|
|
|
|
error);
|
|
|
|
|
if (!ret) {
|
|
|
|
|
*failures = NULL;
|
2014-09-29 10:58:16 -04:00
|
|
|
return FALSE;
|
2019-10-06 22:50:29 +02:00
|
|
|
}
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2021-09-13 11:00:13 +08:00
|
|
|
g_variant_get(ret, "(b^as)", NULL, failures);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2019-10-06 22:50:29 +02:00
|
|
|
return TRUE;
|
2014-09-29 10:58:16 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* nm_client_load_connections_async:
|
|
|
|
|
* @client: the %NMClient
|
2016-08-04 13:38:24 +02:00
|
|
|
* @filenames: (array zero-terminated=1): %NULL-terminated array of filenames to load
|
2014-09-29 10:58:16 -04:00
|
|
|
* @cancellable: a #GCancellable, or %NULL
|
2024-11-27 12:51:24 +01:00
|
|
|
* @callback: (scope async) (closure user_data): callback to be called when the operation completes
|
|
|
|
|
* @user_data: caller-specific data passed to @callback
|
2014-09-29 10:58:16 -04:00
|
|
|
*
|
|
|
|
|
* Requests that the remote settings service asynchronously load or reload the
|
|
|
|
|
* given files, adding or updating the connections described within.
|
|
|
|
|
*
|
|
|
|
|
* See nm_client_load_connections() for more details.
|
|
|
|
|
**/
|
|
|
|
|
void
|
2021-11-09 13:28:54 +01:00
|
|
|
nm_client_load_connections_async(NMClient *client,
|
|
|
|
|
char **filenames,
|
|
|
|
|
GCancellable *cancellable,
|
2014-09-29 10:58:16 -04:00
|
|
|
GAsyncReadyCallback callback,
|
|
|
|
|
gpointer user_data)
|
|
|
|
|
{
|
|
|
|
|
g_return_if_fail(NM_IS_CLIENT(client));
|
2019-10-06 23:16:48 +02:00
|
|
|
g_return_if_fail(!cancellable || G_IS_CANCELLABLE(cancellable));
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
_nm_client_dbus_call(client,
|
|
|
|
|
client,
|
2019-10-06 23:16:48 +02:00
|
|
|
nm_client_load_connections_async,
|
|
|
|
|
cancellable,
|
|
|
|
|
callback,
|
|
|
|
|
user_data,
|
|
|
|
|
NM_DBUS_PATH_SETTINGS,
|
|
|
|
|
NM_DBUS_INTERFACE_SETTINGS,
|
|
|
|
|
"LoadConnections",
|
|
|
|
|
g_variant_new("(^as)", filenames ?: NM_PTRARRAY_EMPTY(char *)),
|
|
|
|
|
G_VARIANT_TYPE("(bas)"),
|
|
|
|
|
G_DBUS_CALL_FLAGS_NONE,
|
|
|
|
|
NM_DBUS_DEFAULT_TIMEOUT_MSEC,
|
|
|
|
|
nm_dbus_connection_call_finish_variant_strip_dbus_error_cb);
|
2014-09-29 10:58:16 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* nm_client_load_connections_finish:
|
|
|
|
|
* @client: the %NMClient
|
2016-08-04 13:38:24 +02:00
|
|
|
* @failures: (out) (transfer full) (array zero-terminated=1): on return, a
|
|
|
|
|
* %NULL-terminated array of filenames that failed to load
|
2014-09-29 10:58:16 -04:00
|
|
|
* @result: the result passed to the #GAsyncReadyCallback
|
|
|
|
|
* @error: location for a #GError, or %NULL
|
|
|
|
|
*
|
|
|
|
|
* Gets the result of an nm_client_load_connections_async() call.
|
|
|
|
|
|
|
|
|
|
* See nm_client_load_connections() for more details.
|
|
|
|
|
*
|
2019-10-06 23:19:32 +02:00
|
|
|
* Returns: %TRUE on success.
|
|
|
|
|
* Note that even in the success case, you might have individual @failures.
|
2014-09-29 10:58:16 -04:00
|
|
|
**/
|
|
|
|
|
gboolean
|
2021-11-09 13:28:54 +01:00
|
|
|
nm_client_load_connections_finish(NMClient *client,
|
|
|
|
|
char ***failures,
|
2014-09-29 10:58:16 -04:00
|
|
|
GAsyncResult *result,
|
2021-11-09 13:28:54 +01:00
|
|
|
GError **error)
|
2014-09-29 10:58:16 -04:00
|
|
|
{
|
2019-10-06 23:16:48 +02:00
|
|
|
gs_unref_variant GVariant *ret = NULL;
|
2014-09-29 10:58:16 -04:00
|
|
|
|
|
|
|
|
g_return_val_if_fail(NM_IS_CLIENT(client), FALSE);
|
2019-10-06 23:16:48 +02:00
|
|
|
g_return_val_if_fail(nm_g_task_is_valid(result, client, nm_client_load_connections_async),
|
|
|
|
|
FALSE);
|
2014-09-29 10:58:16 -04:00
|
|
|
|
2019-10-06 23:16:48 +02:00
|
|
|
ret = g_task_propagate_pointer(G_TASK(result), error);
|
|
|
|
|
if (!ret) {
|
|
|
|
|
*failures = NULL;
|
2014-09-29 10:58:16 -04:00
|
|
|
return FALSE;
|
2019-10-06 23:16:48 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
g_variant_get(ret, "(b^as)", NULL, &failures);
|
|
|
|
|
|
2019-10-06 23:19:32 +02:00
|
|
|
return TRUE;
|
2014-09-29 10:58:16 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* nm_client_reload_connections:
|
|
|
|
|
* @client: the #NMClient
|
|
|
|
|
* @cancellable: a #GCancellable, or %NULL
|
|
|
|
|
* @error: return location for #GError
|
|
|
|
|
*
|
|
|
|
|
* Requests that the remote settings service reload all connection
|
|
|
|
|
* files from disk, adding, updating, and removing connections until
|
|
|
|
|
* the in-memory state matches the on-disk state.
|
|
|
|
|
*
|
2023-03-02 11:59:17 +01:00
|
|
|
* Returns: %TRUE on success, %FALSE on failure
|
libnm: deprecate synchronous/blocking API in libnm
Note that D-Bus is fundamentally asynchronous. Doing blocking calls
on top of D-Bus is odd, especially for libnm's NMClient. That is because
NMClient essentially is a client-side cache of the objects from the D-Bus
interface. This cache should be filled exclusively by (asynchronous) D-Bus
events (PropertiesChanged). So, making a blocking D-Bus call means to wait
for a response and return it, while queuing all messages that are received
in the meantime.
Basically there are three ways how a synchronous API on NMClient could behave:
1) the call just calls g_dbus_connection_call_sync(). This means
that libnm sends a D-Bus request via GDBusConnection, and blockingly
waits for the response. All D-Bus messages that get received in the
meantime are queued in the GMainContext that belongs to NMClient.
That means, none of these D-Bus events are processed until we
iterate the GMainContext after the call returns. The effect is,
that NMClient (and all cached objects in there) are unaffected by
the D-Bus request.
Most of the synchronous API calls in libnm are of this kind.
The problem is that the strict ordering of D-Bus events gets
violated.
For some API this is not an immediate problem. Take for example
nm_device_wifi_request_scan(). The call merely blockingly tells
NetworkManager to start scanning, but since NetworkManager's D-Bus
API does not directly expose any state that tells whether we are
currently scanning, this out of order processing of the D-Bus
request is a small issue.
The problem is more obvious for nm_client_networking_set_enabled().
After calling it, NM_CLIENT_NETWORKING_ENABLED is still unaffected
and unchanged, because the PropertiesChanged signal from D-Bus
is not yet processed.
This means, while you make such a blocking call, NMClient's state
does not change. But usually you perform the synchronous call
to change some state. In this form, the blocking call is not useful,
because NMClient only changes the state after iterating the GMainContext,
and not after the blocking call returns.
2) like 1), but after making the blocking g_dbus_connection_call_sync(),
update the NMClient cache artificially. This is what
nm_manager_check_connectivity() does, to "fix" bgo#784629.
This also has the problem of out-of-order events, but it kinda
solves the problem of not changing the state during the blocking
call. But it does so by hacking the state of the cache. I think
this is really wrong because the state should only be updated from
the ordered stream of D-Bus messages (PropertiesChanged signal and
similar). When libnm decides to modify the state, there may be already
D-Bus messages queued that affect this very state.
3) instead of calling g_dbus_connection_call_sync(), use the
asynchronous g_dbus_connection_call(). If we would use a sepaate
GMainContext for all D-Bus related calls, we could ensure that
while we block for the response, we iterate that internal main context.
This might be nice, because all events are processed in order and
after the blocking call returns, the NMClient state is up to date.
The are problems however: current blocking API does not do this,
so it's a significant change in behavior. Also, it might be
unexpected to the user that during the blocking call the entire
content of NMClient's cache might change and all pointers to the
cache might be invalidated. Also, of course NMClient would invoke
signals for all the changes that happen.
Another problem is that this would be more effort to implement
and it involves a small performance overhead for all D-Bus related
calls (because we have to serialize all events in an internal
GMainContext first and then invoke them on the caller's context).
Also, if the users wants this behavior, they could implement it themself
by running libnm in their own GMainContext. Note that libnm might
have bugs to make that really working, but that should be fixed
instead of adding such synchrnous API behavior.
Read also [1], for why blocking calls are wrong.
[1] https://smcv.pseudorandom.co.uk/2008/11/nonblocking/
So, all possible behaviors for synchronous API have severe behavioural
issues. Mark all this API as deprecated. Also, this serves the purpose of
identifying blocking D-Bus calls in libnm.
Note that "deprecated" here does not really mean that the API is going
to be removed. We don't break API. The user may:
- continue to use this API. It's deprecated, awkward and discouraged,
but if it works, by all means use it.
- use asynchronous API. That's the only sensible way to use D-Bus.
If libnm lacks a certain asynchronous counterpart, it should be
added.
- use GDBusConnection directly. There really isn't anything wrong
with D-Bus or GDBusConnection. This deprecated API is just a wrapper
around g_dbus_connection_call_sync(). You may call it directly
without feeling dirty.
---
The only other remainging API is the synchronous GInitable call for
NMClient. That is an entirely separate beast and not particularly
wrong (from an API point of view).
Note that synchronous API in NMSecretAgentOld, NMVpnPluginOld and
NMVpnServicePlugin as not deprecated here. These types are not part
of the D-Bus cache and while they have similar issues, it's less severe
because they have less state.
2019-09-04 13:58:43 +02:00
|
|
|
*
|
2020-03-13 19:45:09 +01:00
|
|
|
* Deprecated: 1.22: Use nm_client_reload_connections_async() or GDBusConnection.
|
2014-09-29 10:58:16 -04:00
|
|
|
**/
|
|
|
|
|
gboolean
|
|
|
|
|
nm_client_reload_connections(NMClient *client, GCancellable *cancellable, GError **error)
|
|
|
|
|
{
|
2019-10-07 09:30:29 +02:00
|
|
|
gs_unref_variant GVariant *ret = NULL;
|
|
|
|
|
|
2014-09-29 10:58:16 -04:00
|
|
|
g_return_val_if_fail(NM_IS_CLIENT(client), FALSE);
|
2019-10-07 09:30:29 +02:00
|
|
|
g_return_val_if_fail(!cancellable || G_IS_CANCELLABLE(cancellable), FALSE);
|
2014-09-29 10:58:16 -04:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
ret = _nm_client_dbus_call_sync(client,
|
2019-10-07 09:30:29 +02:00
|
|
|
cancellable,
|
|
|
|
|
NM_DBUS_PATH_SETTINGS,
|
|
|
|
|
NM_DBUS_INTERFACE_SETTINGS,
|
|
|
|
|
"ReloadConnections",
|
|
|
|
|
g_variant_new("()"),
|
|
|
|
|
G_VARIANT_TYPE("(b)"),
|
|
|
|
|
G_DBUS_CALL_FLAGS_NONE,
|
|
|
|
|
NM_DBUS_DEFAULT_TIMEOUT_MSEC,
|
|
|
|
|
TRUE,
|
|
|
|
|
error);
|
|
|
|
|
if (!ret)
|
2014-09-29 10:58:16 -04:00
|
|
|
return FALSE;
|
|
|
|
|
|
2019-10-07 09:30:29 +02:00
|
|
|
return TRUE;
|
2014-09-29 10:58:16 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* nm_client_reload_connections_async:
|
|
|
|
|
* @client: the #NMClient
|
|
|
|
|
* @cancellable: a #GCancellable, or %NULL
|
2024-11-27 12:51:24 +01:00
|
|
|
* @callback: (scope async) (closure user_data): callback to be called when the reload operation completes
|
|
|
|
|
* @user_data: caller-specific data passed to @callback
|
2014-09-29 10:58:16 -04:00
|
|
|
*
|
|
|
|
|
* Requests that the remote settings service begin reloading all connection
|
|
|
|
|
* files from disk, adding, updating, and removing connections until the
|
|
|
|
|
* in-memory state matches the on-disk state.
|
|
|
|
|
**/
|
|
|
|
|
void
|
2021-11-09 13:28:54 +01:00
|
|
|
nm_client_reload_connections_async(NMClient *client,
|
|
|
|
|
GCancellable *cancellable,
|
2014-09-29 10:58:16 -04:00
|
|
|
GAsyncReadyCallback callback,
|
|
|
|
|
gpointer user_data)
|
|
|
|
|
{
|
|
|
|
|
g_return_if_fail(NM_IS_CLIENT(client));
|
2019-10-07 09:30:29 +02:00
|
|
|
g_return_if_fail(!cancellable || G_IS_CANCELLABLE(cancellable));
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
_nm_client_dbus_call(client,
|
|
|
|
|
client,
|
2019-10-07 09:30:29 +02:00
|
|
|
nm_client_reload_connections_async,
|
|
|
|
|
cancellable,
|
|
|
|
|
callback,
|
|
|
|
|
user_data,
|
|
|
|
|
NM_DBUS_PATH_SETTINGS,
|
|
|
|
|
NM_DBUS_INTERFACE_SETTINGS,
|
|
|
|
|
"ReloadConnections",
|
|
|
|
|
g_variant_new("()"),
|
|
|
|
|
G_VARIANT_TYPE("(b)"),
|
|
|
|
|
G_DBUS_CALL_FLAGS_NONE,
|
|
|
|
|
NM_DBUS_DEFAULT_TIMEOUT_MSEC,
|
|
|
|
|
nm_dbus_connection_call_finish_variant_strip_dbus_error_cb);
|
2014-09-29 10:58:16 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* nm_client_reload_connections_finish:
|
|
|
|
|
* @client: the #NMClient
|
|
|
|
|
* @result: the result passed to the #GAsyncReadyCallback
|
|
|
|
|
* @error: return location for #GError
|
|
|
|
|
*
|
|
|
|
|
* Gets the result of an nm_client_reload_connections_async() call.
|
|
|
|
|
*
|
2023-03-02 11:59:17 +01:00
|
|
|
* Returns: %TRUE on success, %FALSE on failure
|
2014-09-29 10:58:16 -04:00
|
|
|
**/
|
|
|
|
|
gboolean
|
|
|
|
|
nm_client_reload_connections_finish(NMClient *client, GAsyncResult *result, GError **error)
|
|
|
|
|
{
|
2019-10-07 09:30:29 +02:00
|
|
|
gs_unref_variant GVariant *ret = NULL;
|
2014-09-29 10:58:16 -04:00
|
|
|
|
|
|
|
|
g_return_val_if_fail(NM_IS_CLIENT(client), FALSE);
|
2019-10-07 09:30:29 +02:00
|
|
|
g_return_val_if_fail(nm_g_task_is_valid(result, client, nm_client_reload_connections_async),
|
|
|
|
|
FALSE);
|
2014-09-29 10:58:16 -04:00
|
|
|
|
2019-10-07 09:30:29 +02:00
|
|
|
ret = g_task_propagate_pointer(G_TASK(result), error);
|
|
|
|
|
if (!ret)
|
2014-09-29 10:58:16 -04:00
|
|
|
return FALSE;
|
2019-10-07 09:30:29 +02:00
|
|
|
|
|
|
|
|
return TRUE;
|
2014-09-29 10:58:16 -04:00
|
|
|
}
|
|
|
|
|
|
2016-10-02 18:22:50 +02:00
|
|
|
/*****************************************************************************/
|
2014-07-24 08:53:33 -04:00
|
|
|
|
2016-10-25 11:11:12 +02:00
|
|
|
/**
|
|
|
|
|
* nm_client_get_dns_mode:
|
|
|
|
|
* @client: the #NMClient
|
|
|
|
|
*
|
|
|
|
|
* Gets the current DNS processing mode.
|
|
|
|
|
*
|
2023-03-02 11:59:17 +01:00
|
|
|
* Returns: the DNS processing mode, or %NULL in case the
|
2016-10-25 11:11:12 +02:00
|
|
|
* value is not available.
|
|
|
|
|
*
|
|
|
|
|
* Since: 1.6
|
|
|
|
|
**/
|
|
|
|
|
const char *
|
|
|
|
|
nm_client_get_dns_mode(NMClient *client)
|
|
|
|
|
{
|
|
|
|
|
g_return_val_if_fail(NM_IS_CLIENT(client), NULL);
|
|
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
return NM_CLIENT_GET_PRIVATE(client)->dns_manager.mode;
|
2016-10-25 11:11:12 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* nm_client_get_dns_rc_manager:
|
|
|
|
|
* @client: the #NMClient
|
|
|
|
|
*
|
|
|
|
|
* Gets the current DNS resolv.conf manager.
|
|
|
|
|
*
|
2023-03-02 11:59:17 +01:00
|
|
|
* Returns: the resolv.conf manager or %NULL in case the
|
2016-10-25 11:11:12 +02:00
|
|
|
* value is not available.
|
|
|
|
|
*
|
|
|
|
|
* Since: 1.6
|
|
|
|
|
**/
|
|
|
|
|
const char *
|
|
|
|
|
nm_client_get_dns_rc_manager(NMClient *client)
|
|
|
|
|
{
|
|
|
|
|
g_return_val_if_fail(NM_IS_CLIENT(client), NULL);
|
|
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
return NM_CLIENT_GET_PRIVATE(client)->dns_manager.rc_manager;
|
2016-10-25 11:11:12 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* nm_client_get_dns_configuration:
|
|
|
|
|
* @client: a #NMClient
|
|
|
|
|
*
|
|
|
|
|
* Gets the current DNS configuration
|
|
|
|
|
*
|
|
|
|
|
* Returns: (transfer none) (element-type NMDnsEntry): a #GPtrArray
|
|
|
|
|
* containing #NMDnsEntry elements or %NULL in case the value is not
|
|
|
|
|
* available. The returned array is owned by the #NMClient object
|
|
|
|
|
* and should not be modified.
|
|
|
|
|
*
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
* Since: 1.6
|
|
|
|
|
**/
|
|
|
|
|
const GPtrArray *
|
|
|
|
|
nm_client_get_dns_configuration(NMClient *client)
|
2014-09-24 13:14:48 -04:00
|
|
|
{
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
g_return_val_if_fail(NM_IS_CLIENT(client), NULL);
|
2014-07-24 08:53:33 -04:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
return NM_CLIENT_GET_PRIVATE(client)->dns_manager.configuration;
|
2014-10-10 14:28:49 -05:00
|
|
|
}
|
|
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
static NMLDBusNotifyUpdatePropFlags
|
2021-11-09 13:28:54 +01:00
|
|
|
_notify_update_prop_dns_manager_configuration(NMClient *self,
|
|
|
|
|
NMLDBusObject *dbobj,
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
const NMLDBusMetaIface *meta_iface,
|
|
|
|
|
guint dbus_property_idx,
|
2021-11-09 13:28:54 +01:00
|
|
|
GVariant *value)
|
2014-10-10 14:28:49 -05:00
|
|
|
{
|
2021-11-09 13:28:54 +01:00
|
|
|
NMClientPrivate *priv = NM_CLIENT_GET_PRIVATE(self);
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
gs_unref_ptrarray GPtrArray *configuration_old = NULL;
|
|
|
|
|
gs_unref_ptrarray GPtrArray *configuration_new = NULL;
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
nm_assert(G_OBJECT(self) == dbobj->nmobj);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
if (value) {
|
2021-11-09 13:28:54 +01:00
|
|
|
GVariant *entry_var_tmp;
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
GVariantIter iter;
|
2021-11-09 13:28:54 +01:00
|
|
|
GPtrArray *array;
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
configuration_new = g_ptr_array_new_with_free_func((GDestroyNotify) nm_dns_entry_unref);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
g_variant_iter_init(&iter, value);
|
|
|
|
|
while (g_variant_iter_next(&iter, "@a{sv}", &entry_var_tmp)) {
|
2021-11-09 13:28:54 +01:00
|
|
|
gs_unref_variant GVariant *entry_var = entry_var_tmp;
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
nm_auto_free_variant_iter GVariantIter *iterp_nameservers = NULL;
|
|
|
|
|
nm_auto_free_variant_iter GVariantIter *iterp_domains = NULL;
|
2021-11-09 13:28:54 +01:00
|
|
|
gs_free char **nameservers = NULL;
|
|
|
|
|
gs_free char **domains = NULL;
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
gboolean vpn = FALSE;
|
2021-11-09 13:28:54 +01:00
|
|
|
NMDnsEntry *entry;
|
|
|
|
|
char *interface = NULL;
|
|
|
|
|
char *str;
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
gint32 priority = 0;
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
if (!g_variant_lookup(entry_var, "nameservers", "as", &iterp_nameservers)
|
|
|
|
|
|| !g_variant_lookup(entry_var, "priority", "i", &priority)) {
|
|
|
|
|
g_warning("Ignoring invalid DNS configuration");
|
|
|
|
|
continue;
|
|
|
|
|
}
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
array = g_ptr_array_new();
|
|
|
|
|
while (g_variant_iter_next(iterp_nameservers, "&s", &str))
|
|
|
|
|
g_ptr_array_add(array, str);
|
|
|
|
|
g_ptr_array_add(array, NULL);
|
|
|
|
|
nameservers = (char **) g_ptr_array_free(array, FALSE);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
if (g_variant_lookup(entry_var, "domains", "as", &iterp_domains)) {
|
|
|
|
|
array = g_ptr_array_new();
|
|
|
|
|
while (g_variant_iter_next(iterp_domains, "&s", &str))
|
|
|
|
|
g_ptr_array_add(array, str);
|
|
|
|
|
g_ptr_array_add(array, NULL);
|
|
|
|
|
domains = (char **) g_ptr_array_free(array, FALSE);
|
|
|
|
|
}
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
g_variant_lookup(entry_var, "interface", "&s", &interface);
|
|
|
|
|
g_variant_lookup(entry_var, "vpn", "b", &vpn);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
entry = nm_dns_entry_new(interface,
|
|
|
|
|
(const char *const *) nameservers,
|
|
|
|
|
(const char *const *) domains,
|
|
|
|
|
priority,
|
|
|
|
|
vpn);
|
|
|
|
|
if (!entry) {
|
|
|
|
|
g_warning("Ignoring invalid DNS entry");
|
|
|
|
|
continue;
|
|
|
|
|
}
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
g_ptr_array_add(configuration_new, entry);
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
configuration_old = priv->dns_manager.configuration;
|
|
|
|
|
priv->dns_manager.configuration = g_steal_pointer(&configuration_new);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
return NML_DBUS_NOTIFY_UPDATE_PROP_FLAGS_NOTIFY;
|
2016-10-25 11:11:12 +02:00
|
|
|
}
|
|
|
|
|
|
2019-12-20 18:10:55 +01:00
|
|
|
/**
|
|
|
|
|
* nm_client_get_capabilities:
|
|
|
|
|
* @client: the #NMClient instance
|
2023-03-01 01:21:38 +01:00
|
|
|
* @length: (out) (optional): the number of returned capabilities.
|
2019-12-20 18:10:55 +01:00
|
|
|
*
|
|
|
|
|
* Returns: (transfer none) (array length=length): the
|
|
|
|
|
* list of capabilities reported by the server or %NULL
|
|
|
|
|
* if the capabilities are unknown.
|
|
|
|
|
* The numeric values correspond to #NMCapability enum.
|
|
|
|
|
* The array is terminated by a numeric zero sentinel
|
|
|
|
|
* at position @length.
|
|
|
|
|
*
|
|
|
|
|
* Since: 1.24
|
|
|
|
|
*/
|
|
|
|
|
const guint32 *
|
|
|
|
|
nm_client_get_capabilities(NMClient *client, gsize *length)
|
|
|
|
|
{
|
|
|
|
|
NMClientPrivate *priv;
|
|
|
|
|
|
|
|
|
|
g_return_val_if_fail(NM_IS_CLIENT(client), NULL);
|
|
|
|
|
|
|
|
|
|
priv = NM_CLIENT_GET_PRIVATE(client);
|
|
|
|
|
|
|
|
|
|
NM_SET_OUT(length, priv->nm.capabilities_len);
|
|
|
|
|
return priv->nm.capabilities_arr;
|
|
|
|
|
}
|
|
|
|
|
|
core: add "VersionInfo" property on D-Bus and NMClient
This exposes NM_VERSION as number (contrary to the "Version", which is a
string). That is in particular useful, because the number can be
compared with <> due to the encoding of the version.
While at it, don't make it a single number. Expose an array of numbers,
where the following numbers are a bitfield of capabilities.
Note that before commit 3c67a1ec5e8e ('cli: remove version check against
NM'), we used to parse the "Version" string to detect the version. As
such, the information that "VersionInfo" exposes now, was already
(somewhat) available, you just had to parse the string. The main benefit of
"VersionInfo" is that it can expose capabilities (patched behavior) in
in a lightweight bitfield. To include the numerical version there is
just useful on top.
Currently no additional capabilities are exposed. The idea is of course
to have a place in the future, where we can expose additional
capabilities. Adding a capability flag is most useful for behavior that we
backport to older branches. Otherwise, we could just check the daemon version
alone. But since we only add "VersionInfo" property only now, we cannot backport
any capability further than this, because the "VersionInfo" property itself
won't be backported. As such, this will only be useful in the future by having
a place where we can add (and backport) capabilities.
Note that there is some overlap with the existing "Capability" property
and NMCapability enum. The difference is that adding a capability via "VersionInfo"
is only one bit, and thus cheaper. Most importantly, having it cheaper means
the downsides of adding a capability flag is significantly removed. In
practice, we could live without capabilities for a long time, so they
must be very cheap for them to be worth to add. Another difference might be,
that we will want that the VersionInfo is about compile time defaults (e.g.
a certain patch/behavior that is in or not), while NM_CAPABILITY_TEAM depends on
whether the team plugin is loaded at runtime.
2022-12-13 12:25:04 +01:00
|
|
|
/**
|
|
|
|
|
* nm_client_get_version_info:
|
|
|
|
|
* @client: the #NMClient instance
|
|
|
|
|
* @length: (out): the number of returned capabilities.
|
|
|
|
|
*
|
|
|
|
|
* If available, the first element in the array is NM_VERSION which
|
|
|
|
|
* encodes the daemon version as "(major << 16 | minor << 8 | micro)".
|
2024-12-04 14:24:38 +01:00
|
|
|
* The following elements are a bitfield of %NMVersionInfoCapability
|
core: add "VersionInfo" property on D-Bus and NMClient
This exposes NM_VERSION as number (contrary to the "Version", which is a
string). That is in particular useful, because the number can be
compared with <> due to the encoding of the version.
While at it, don't make it a single number. Expose an array of numbers,
where the following numbers are a bitfield of capabilities.
Note that before commit 3c67a1ec5e8e ('cli: remove version check against
NM'), we used to parse the "Version" string to detect the version. As
such, the information that "VersionInfo" exposes now, was already
(somewhat) available, you just had to parse the string. The main benefit of
"VersionInfo" is that it can expose capabilities (patched behavior) in
in a lightweight bitfield. To include the numerical version there is
just useful on top.
Currently no additional capabilities are exposed. The idea is of course
to have a place in the future, where we can expose additional
capabilities. Adding a capability flag is most useful for behavior that we
backport to older branches. Otherwise, we could just check the daemon version
alone. But since we only add "VersionInfo" property only now, we cannot backport
any capability further than this, because the "VersionInfo" property itself
won't be backported. As such, this will only be useful in the future by having
a place where we can add (and backport) capabilities.
Note that there is some overlap with the existing "Capability" property
and NMCapability enum. The difference is that adding a capability via "VersionInfo"
is only one bit, and thus cheaper. Most importantly, having it cheaper means
the downsides of adding a capability flag is significantly removed. In
practice, we could live without capabilities for a long time, so they
must be very cheap for them to be worth to add. Another difference might be,
that we will want that the VersionInfo is about compile time defaults (e.g.
a certain patch/behavior that is in or not), while NM_CAPABILITY_TEAM depends on
whether the team plugin is loaded at runtime.
2022-12-13 12:25:04 +01:00
|
|
|
* that indicate that the daemon supports a certain capability.
|
|
|
|
|
*
|
|
|
|
|
* Returns: (transfer none) (array length=length): the
|
|
|
|
|
* list of capabilities reported by the server or %NULL
|
|
|
|
|
* if the capabilities are unknown.
|
|
|
|
|
*
|
|
|
|
|
* Since: 1.42
|
|
|
|
|
*/
|
|
|
|
|
const guint32 *
|
|
|
|
|
nm_client_get_version_info(NMClient *client, gsize *length)
|
|
|
|
|
{
|
|
|
|
|
NMClientPrivate *priv;
|
|
|
|
|
|
|
|
|
|
g_return_val_if_fail(NM_IS_CLIENT(client), NULL);
|
|
|
|
|
g_return_val_if_fail(length, NULL);
|
|
|
|
|
|
|
|
|
|
priv = NM_CLIENT_GET_PRIVATE(client);
|
|
|
|
|
|
|
|
|
|
*length = priv->nm.version_info_len;
|
|
|
|
|
return priv->nm.version_info_arr;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static NMLDBusNotifyUpdatePropFlags
|
|
|
|
|
_notify_update_prop_nm_au(guint32 **p_arr, gsize *p_len, GVariant *value)
|
|
|
|
|
{
|
|
|
|
|
nm_clear_g_free(p_arr);
|
|
|
|
|
*p_len = 0;
|
|
|
|
|
|
|
|
|
|
if (value) {
|
|
|
|
|
const guint32 *arr;
|
|
|
|
|
gsize len;
|
|
|
|
|
|
|
|
|
|
arr = g_variant_get_fixed_array(value, &len, sizeof(guint32));
|
|
|
|
|
*p_len = len;
|
|
|
|
|
*p_arr = g_new(guint32, len + 1);
|
|
|
|
|
if (len > 0)
|
|
|
|
|
memcpy(*p_arr, arr, len * sizeof(guint32));
|
|
|
|
|
(*p_arr)[len] = 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return NML_DBUS_NOTIFY_UPDATE_PROP_FLAGS_NOTIFY;
|
|
|
|
|
}
|
|
|
|
|
|
2019-12-20 18:10:55 +01:00
|
|
|
static NMLDBusNotifyUpdatePropFlags
|
2021-11-09 13:28:54 +01:00
|
|
|
_notify_update_prop_nm_capabilities(NMClient *self,
|
|
|
|
|
NMLDBusObject *dbobj,
|
2019-12-20 18:10:55 +01:00
|
|
|
const NMLDBusMetaIface *meta_iface,
|
|
|
|
|
guint dbus_property_idx,
|
2021-11-09 13:28:54 +01:00
|
|
|
GVariant *value)
|
2019-12-20 18:10:55 +01:00
|
|
|
{
|
|
|
|
|
NMClientPrivate *priv = NM_CLIENT_GET_PRIVATE(self);
|
|
|
|
|
|
|
|
|
|
nm_assert(G_OBJECT(self) == dbobj->nmobj);
|
|
|
|
|
|
core: add "VersionInfo" property on D-Bus and NMClient
This exposes NM_VERSION as number (contrary to the "Version", which is a
string). That is in particular useful, because the number can be
compared with <> due to the encoding of the version.
While at it, don't make it a single number. Expose an array of numbers,
where the following numbers are a bitfield of capabilities.
Note that before commit 3c67a1ec5e8e ('cli: remove version check against
NM'), we used to parse the "Version" string to detect the version. As
such, the information that "VersionInfo" exposes now, was already
(somewhat) available, you just had to parse the string. The main benefit of
"VersionInfo" is that it can expose capabilities (patched behavior) in
in a lightweight bitfield. To include the numerical version there is
just useful on top.
Currently no additional capabilities are exposed. The idea is of course
to have a place in the future, where we can expose additional
capabilities. Adding a capability flag is most useful for behavior that we
backport to older branches. Otherwise, we could just check the daemon version
alone. But since we only add "VersionInfo" property only now, we cannot backport
any capability further than this, because the "VersionInfo" property itself
won't be backported. As such, this will only be useful in the future by having
a place where we can add (and backport) capabilities.
Note that there is some overlap with the existing "Capability" property
and NMCapability enum. The difference is that adding a capability via "VersionInfo"
is only one bit, and thus cheaper. Most importantly, having it cheaper means
the downsides of adding a capability flag is significantly removed. In
practice, we could live without capabilities for a long time, so they
must be very cheap for them to be worth to add. Another difference might be,
that we will want that the VersionInfo is about compile time defaults (e.g.
a certain patch/behavior that is in or not), while NM_CAPABILITY_TEAM depends on
whether the team plugin is loaded at runtime.
2022-12-13 12:25:04 +01:00
|
|
|
return _notify_update_prop_nm_au(&priv->nm.capabilities_arr, &priv->nm.capabilities_len, value);
|
|
|
|
|
}
|
2019-12-20 18:10:55 +01:00
|
|
|
|
core: add "VersionInfo" property on D-Bus and NMClient
This exposes NM_VERSION as number (contrary to the "Version", which is a
string). That is in particular useful, because the number can be
compared with <> due to the encoding of the version.
While at it, don't make it a single number. Expose an array of numbers,
where the following numbers are a bitfield of capabilities.
Note that before commit 3c67a1ec5e8e ('cli: remove version check against
NM'), we used to parse the "Version" string to detect the version. As
such, the information that "VersionInfo" exposes now, was already
(somewhat) available, you just had to parse the string. The main benefit of
"VersionInfo" is that it can expose capabilities (patched behavior) in
in a lightweight bitfield. To include the numerical version there is
just useful on top.
Currently no additional capabilities are exposed. The idea is of course
to have a place in the future, where we can expose additional
capabilities. Adding a capability flag is most useful for behavior that we
backport to older branches. Otherwise, we could just check the daemon version
alone. But since we only add "VersionInfo" property only now, we cannot backport
any capability further than this, because the "VersionInfo" property itself
won't be backported. As such, this will only be useful in the future by having
a place where we can add (and backport) capabilities.
Note that there is some overlap with the existing "Capability" property
and NMCapability enum. The difference is that adding a capability via "VersionInfo"
is only one bit, and thus cheaper. Most importantly, having it cheaper means
the downsides of adding a capability flag is significantly removed. In
practice, we could live without capabilities for a long time, so they
must be very cheap for them to be worth to add. Another difference might be,
that we will want that the VersionInfo is about compile time defaults (e.g.
a certain patch/behavior that is in or not), while NM_CAPABILITY_TEAM depends on
whether the team plugin is loaded at runtime.
2022-12-13 12:25:04 +01:00
|
|
|
static NMLDBusNotifyUpdatePropFlags
|
|
|
|
|
_notify_update_prop_nm_version_info(NMClient *self,
|
|
|
|
|
NMLDBusObject *dbobj,
|
|
|
|
|
const NMLDBusMetaIface *meta_iface,
|
|
|
|
|
guint dbus_property_idx,
|
|
|
|
|
GVariant *value)
|
|
|
|
|
{
|
|
|
|
|
NMClientPrivate *priv = NM_CLIENT_GET_PRIVATE(self);
|
2019-12-20 18:10:55 +01:00
|
|
|
|
core: add "VersionInfo" property on D-Bus and NMClient
This exposes NM_VERSION as number (contrary to the "Version", which is a
string). That is in particular useful, because the number can be
compared with <> due to the encoding of the version.
While at it, don't make it a single number. Expose an array of numbers,
where the following numbers are a bitfield of capabilities.
Note that before commit 3c67a1ec5e8e ('cli: remove version check against
NM'), we used to parse the "Version" string to detect the version. As
such, the information that "VersionInfo" exposes now, was already
(somewhat) available, you just had to parse the string. The main benefit of
"VersionInfo" is that it can expose capabilities (patched behavior) in
in a lightweight bitfield. To include the numerical version there is
just useful on top.
Currently no additional capabilities are exposed. The idea is of course
to have a place in the future, where we can expose additional
capabilities. Adding a capability flag is most useful for behavior that we
backport to older branches. Otherwise, we could just check the daemon version
alone. But since we only add "VersionInfo" property only now, we cannot backport
any capability further than this, because the "VersionInfo" property itself
won't be backported. As such, this will only be useful in the future by having
a place where we can add (and backport) capabilities.
Note that there is some overlap with the existing "Capability" property
and NMCapability enum. The difference is that adding a capability via "VersionInfo"
is only one bit, and thus cheaper. Most importantly, having it cheaper means
the downsides of adding a capability flag is significantly removed. In
practice, we could live without capabilities for a long time, so they
must be very cheap for them to be worth to add. Another difference might be,
that we will want that the VersionInfo is about compile time defaults (e.g.
a certain patch/behavior that is in or not), while NM_CAPABILITY_TEAM depends on
whether the team plugin is loaded at runtime.
2022-12-13 12:25:04 +01:00
|
|
|
nm_assert(G_OBJECT(self) == dbobj->nmobj);
|
2019-12-20 18:10:55 +01:00
|
|
|
|
core: add "VersionInfo" property on D-Bus and NMClient
This exposes NM_VERSION as number (contrary to the "Version", which is a
string). That is in particular useful, because the number can be
compared with <> due to the encoding of the version.
While at it, don't make it a single number. Expose an array of numbers,
where the following numbers are a bitfield of capabilities.
Note that before commit 3c67a1ec5e8e ('cli: remove version check against
NM'), we used to parse the "Version" string to detect the version. As
such, the information that "VersionInfo" exposes now, was already
(somewhat) available, you just had to parse the string. The main benefit of
"VersionInfo" is that it can expose capabilities (patched behavior) in
in a lightweight bitfield. To include the numerical version there is
just useful on top.
Currently no additional capabilities are exposed. The idea is of course
to have a place in the future, where we can expose additional
capabilities. Adding a capability flag is most useful for behavior that we
backport to older branches. Otherwise, we could just check the daemon version
alone. But since we only add "VersionInfo" property only now, we cannot backport
any capability further than this, because the "VersionInfo" property itself
won't be backported. As such, this will only be useful in the future by having
a place where we can add (and backport) capabilities.
Note that there is some overlap with the existing "Capability" property
and NMCapability enum. The difference is that adding a capability via "VersionInfo"
is only one bit, and thus cheaper. Most importantly, having it cheaper means
the downsides of adding a capability flag is significantly removed. In
practice, we could live without capabilities for a long time, so they
must be very cheap for them to be worth to add. Another difference might be,
that we will want that the VersionInfo is about compile time defaults (e.g.
a certain patch/behavior that is in or not), while NM_CAPABILITY_TEAM depends on
whether the team plugin is loaded at runtime.
2022-12-13 12:25:04 +01:00
|
|
|
return _notify_update_prop_nm_au(&priv->nm.version_info_arr, &priv->nm.version_info_len, value);
|
2019-12-20 18:10:55 +01:00
|
|
|
}
|
|
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
/*****************************************************************************/
|
|
|
|
|
|
2017-10-21 16:05:19 +02:00
|
|
|
/**
|
|
|
|
|
* nm_client_get_checkpoints:
|
|
|
|
|
* @client: a #NMClient
|
|
|
|
|
*
|
|
|
|
|
* Gets all the active checkpoints.
|
|
|
|
|
*
|
|
|
|
|
* Returns: (transfer none) (element-type NMCheckpoint): a #GPtrArray
|
|
|
|
|
* containing all the #NMCheckpoint. The returned array is owned by the
|
|
|
|
|
* #NMClient object and should not be modified.
|
|
|
|
|
*
|
|
|
|
|
* Since: 1.12
|
|
|
|
|
**/
|
|
|
|
|
const GPtrArray *
|
|
|
|
|
nm_client_get_checkpoints(NMClient *client)
|
|
|
|
|
{
|
|
|
|
|
g_return_val_if_fail(NM_IS_CLIENT(client), NULL);
|
|
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
return nml_dbus_property_ao_get_objs_as_ptrarray(
|
|
|
|
|
&NM_CLIENT_GET_PRIVATE(client)->nm.property_ao[PROPERTY_AO_IDX_CHECKPOINTS]);
|
2017-10-21 16:05:19 +02:00
|
|
|
}
|
|
|
|
|
|
2019-10-04 17:42:32 +02:00
|
|
|
static void
|
|
|
|
|
checkpoint_create_cb(GObject *object, GAsyncResult *result, gpointer user_data)
|
|
|
|
|
{
|
2021-11-09 13:28:54 +01:00
|
|
|
gs_unref_object GTask *task = user_data;
|
|
|
|
|
gs_unref_variant GVariant *ret = NULL;
|
|
|
|
|
const char *v_checkpoint_path;
|
|
|
|
|
GError *error = NULL;
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2019-10-04 17:42:32 +02:00
|
|
|
ret = g_dbus_connection_call_finish(G_DBUS_CONNECTION(object), result, &error);
|
|
|
|
|
if (!ret) {
|
2020-01-23 10:36:24 +01:00
|
|
|
if (!nm_utils_error_is_cancelled(error))
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
g_dbus_error_strip_remote_error(error);
|
2019-10-04 17:42:32 +02:00
|
|
|
g_task_return_error(task, error);
|
|
|
|
|
return;
|
|
|
|
|
}
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
g_variant_get(ret, "(&o)", &v_checkpoint_path);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
_request_wait_start(g_steal_pointer(&task),
|
|
|
|
|
"CheckpointCreate",
|
|
|
|
|
NM_TYPE_CHECKPOINT,
|
|
|
|
|
v_checkpoint_path,
|
|
|
|
|
NULL);
|
2019-10-04 17:42:32 +02:00
|
|
|
}
|
|
|
|
|
|
2017-10-21 16:05:19 +02:00
|
|
|
/**
|
2018-04-04 13:33:53 +02:00
|
|
|
* nm_client_checkpoint_create:
|
2017-10-21 16:05:19 +02:00
|
|
|
* @client: the %NMClient
|
|
|
|
|
* @devices: (element-type NMDevice): a list of devices for which a
|
|
|
|
|
* checkpoint should be created.
|
|
|
|
|
* @rollback_timeout: the rollback timeout in seconds
|
|
|
|
|
* @flags: creation flags
|
|
|
|
|
* @cancellable: a #GCancellable, or %NULL
|
2024-11-27 12:51:24 +01:00
|
|
|
* @callback: (scope async) (closure user_data): callback to be called when the add operation completes
|
|
|
|
|
* @user_data: caller-specific data passed to @callback
|
2017-10-21 16:05:19 +02:00
|
|
|
*
|
|
|
|
|
* Creates a checkpoint of the current networking configuration
|
|
|
|
|
* for given interfaces. An empty @devices argument means all
|
|
|
|
|
* devices. If @rollback_timeout is not zero, a rollback is
|
|
|
|
|
* automatically performed after the given timeout.
|
|
|
|
|
*
|
|
|
|
|
* Since: 1.12
|
|
|
|
|
**/
|
|
|
|
|
void
|
2021-11-09 13:28:54 +01:00
|
|
|
nm_client_checkpoint_create(NMClient *client,
|
|
|
|
|
const GPtrArray *devices,
|
2018-04-04 13:33:53 +02:00
|
|
|
guint32 rollback_timeout,
|
|
|
|
|
NMCheckpointCreateFlags flags,
|
2021-11-09 13:28:54 +01:00
|
|
|
GCancellable *cancellable,
|
2018-04-04 13:33:53 +02:00
|
|
|
GAsyncReadyCallback callback,
|
|
|
|
|
gpointer user_data)
|
2017-10-21 16:05:19 +02:00
|
|
|
{
|
2019-10-04 17:42:32 +02:00
|
|
|
gs_free const char **paths = NULL;
|
|
|
|
|
guint i;
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2017-10-21 16:05:19 +02:00
|
|
|
g_return_if_fail(NM_IS_CLIENT(client));
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2019-10-04 17:42:32 +02:00
|
|
|
if (devices && devices->len > 0) {
|
|
|
|
|
paths = g_new(const char *, devices->len + 1);
|
|
|
|
|
for (i = 0; i < devices->len; i++)
|
|
|
|
|
paths[i] = nm_object_get_path(NM_OBJECT(devices->pdata[i]));
|
|
|
|
|
paths[i] = NULL;
|
2017-10-21 16:05:19 +02:00
|
|
|
}
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
_nm_client_dbus_call(
|
|
|
|
|
client,
|
2020-09-28 16:03:33 +02:00
|
|
|
client,
|
2019-10-04 17:42:32 +02:00
|
|
|
nm_client_checkpoint_create,
|
|
|
|
|
cancellable,
|
|
|
|
|
callback,
|
|
|
|
|
user_data,
|
|
|
|
|
NM_DBUS_PATH,
|
|
|
|
|
NM_DBUS_INTERFACE,
|
|
|
|
|
"CheckpointCreate",
|
|
|
|
|
g_variant_new("(^aouu)", paths ?: NM_PTRARRAY_EMPTY(const char *), rollback_timeout, flags),
|
|
|
|
|
G_VARIANT_TYPE("(o)"),
|
|
|
|
|
G_DBUS_CALL_FLAGS_NONE,
|
|
|
|
|
NM_DBUS_DEFAULT_TIMEOUT_MSEC,
|
|
|
|
|
checkpoint_create_cb);
|
2017-10-21 16:05:19 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* nm_client_checkpoint_create_finish:
|
|
|
|
|
* @client: the #NMClient
|
|
|
|
|
* @result: the result passed to the #GAsyncReadyCallback
|
|
|
|
|
* @error: location for a #GError, or %NULL
|
|
|
|
|
*
|
2018-04-04 13:33:53 +02:00
|
|
|
* Gets the result of a call to nm_client_checkpoint_create().
|
2017-10-21 16:05:19 +02:00
|
|
|
*
|
|
|
|
|
* Returns: (transfer full): the new #NMCheckpoint on success, %NULL on
|
|
|
|
|
* failure, in which case @error will be set.
|
|
|
|
|
*
|
|
|
|
|
* Since: 1.12
|
|
|
|
|
**/
|
|
|
|
|
NMCheckpoint *
|
|
|
|
|
nm_client_checkpoint_create_finish(NMClient *client, GAsyncResult *result, GError **error)
|
|
|
|
|
{
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
return NM_CHECKPOINT(
|
|
|
|
|
_request_wait_finish(client, result, nm_client_checkpoint_create, NULL, error));
|
2017-10-21 16:05:19 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
2018-04-04 13:33:53 +02:00
|
|
|
* nm_client_checkpoint_destroy:
|
2017-10-21 16:05:19 +02:00
|
|
|
* @client: the %NMClient
|
2018-04-04 13:33:53 +02:00
|
|
|
* @checkpoint_path: the D-Bus path for the checkpoint
|
2017-10-21 16:05:19 +02:00
|
|
|
* @cancellable: a #GCancellable, or %NULL
|
2024-11-27 12:51:24 +01:00
|
|
|
* @callback: (scope async) (closure user_data): callback to be called when the add operation completes
|
|
|
|
|
* @user_data: caller-specific data passed to @callback
|
2017-10-21 16:05:19 +02:00
|
|
|
*
|
|
|
|
|
* Destroys an existing checkpoint without performing a rollback.
|
|
|
|
|
*
|
|
|
|
|
* Since: 1.12
|
|
|
|
|
**/
|
|
|
|
|
void
|
2021-11-09 13:28:54 +01:00
|
|
|
nm_client_checkpoint_destroy(NMClient *client,
|
|
|
|
|
const char *checkpoint_path,
|
|
|
|
|
GCancellable *cancellable,
|
2018-04-04 13:33:53 +02:00
|
|
|
GAsyncReadyCallback callback,
|
|
|
|
|
gpointer user_data)
|
2017-10-21 16:05:19 +02:00
|
|
|
{
|
|
|
|
|
g_return_if_fail(NM_IS_CLIENT(client));
|
2018-04-04 13:33:53 +02:00
|
|
|
g_return_if_fail(checkpoint_path && checkpoint_path[0] == '/');
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
_nm_client_dbus_call(client,
|
|
|
|
|
client,
|
2019-10-06 10:52:42 +02:00
|
|
|
nm_client_checkpoint_destroy,
|
|
|
|
|
cancellable,
|
|
|
|
|
callback,
|
|
|
|
|
user_data,
|
|
|
|
|
NM_DBUS_PATH,
|
|
|
|
|
NM_DBUS_INTERFACE,
|
|
|
|
|
"CheckpointDestroy",
|
|
|
|
|
g_variant_new("(o)", checkpoint_path),
|
|
|
|
|
G_VARIANT_TYPE("()"),
|
|
|
|
|
G_DBUS_CALL_FLAGS_NONE,
|
|
|
|
|
NM_DBUS_DEFAULT_TIMEOUT_MSEC,
|
|
|
|
|
nm_dbus_connection_call_finish_void_strip_dbus_error_cb);
|
2017-10-21 16:05:19 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* nm_client_checkpoint_destroy_finish:
|
|
|
|
|
* @client: an #NMClient
|
|
|
|
|
* @result: the result passed to the #GAsyncReadyCallback
|
|
|
|
|
* @error: location for a #GError, or %NULL
|
|
|
|
|
*
|
2018-04-04 13:33:53 +02:00
|
|
|
* Gets the result of a call to nm_client_checkpoint_destroy().
|
2017-10-21 16:05:19 +02:00
|
|
|
*
|
|
|
|
|
* Returns: %TRUE on success or %FALSE on failure, in which case
|
|
|
|
|
* @error will be set.
|
|
|
|
|
*
|
|
|
|
|
* Since: 1.12
|
|
|
|
|
**/
|
|
|
|
|
gboolean
|
|
|
|
|
nm_client_checkpoint_destroy_finish(NMClient *client, GAsyncResult *result, GError **error)
|
|
|
|
|
{
|
|
|
|
|
g_return_val_if_fail(NM_IS_CLIENT(client), FALSE);
|
2019-10-06 10:52:42 +02:00
|
|
|
g_return_val_if_fail(nm_g_task_is_valid(result, client, nm_client_checkpoint_destroy), FALSE);
|
2017-10-21 16:05:19 +02:00
|
|
|
|
2019-10-06 10:52:42 +02:00
|
|
|
return g_task_propagate_boolean(G_TASK(result), error);
|
2017-10-21 16:05:19 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
2018-04-04 13:33:53 +02:00
|
|
|
* nm_client_checkpoint_rollback:
|
2017-10-21 16:05:19 +02:00
|
|
|
* @client: the %NMClient
|
2018-04-04 13:33:53 +02:00
|
|
|
* @checkpoint_path: the D-Bus path to the checkpoint
|
2017-10-21 16:05:19 +02:00
|
|
|
* @cancellable: a #GCancellable, or %NULL
|
2024-11-27 12:51:24 +01:00
|
|
|
* @callback: (scope async) (closure user_data): callback to be called when the add operation completes
|
|
|
|
|
* @user_data: caller-specific data passed to @callback
|
2017-10-21 16:05:19 +02:00
|
|
|
*
|
|
|
|
|
* Performs the rollback of a checkpoint before the timeout is reached.
|
|
|
|
|
*
|
|
|
|
|
* Since: 1.12
|
|
|
|
|
**/
|
|
|
|
|
void
|
2021-11-09 13:28:54 +01:00
|
|
|
nm_client_checkpoint_rollback(NMClient *client,
|
|
|
|
|
const char *checkpoint_path,
|
|
|
|
|
GCancellable *cancellable,
|
2018-04-04 13:33:53 +02:00
|
|
|
GAsyncReadyCallback callback,
|
|
|
|
|
gpointer user_data)
|
2017-10-21 16:05:19 +02:00
|
|
|
{
|
|
|
|
|
g_return_if_fail(NM_IS_CLIENT(client));
|
2018-04-04 13:33:53 +02:00
|
|
|
g_return_if_fail(checkpoint_path && checkpoint_path[0] == '/');
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
_nm_client_dbus_call(client,
|
|
|
|
|
client,
|
2019-10-06 11:06:52 +02:00
|
|
|
nm_client_checkpoint_rollback,
|
|
|
|
|
cancellable,
|
|
|
|
|
callback,
|
|
|
|
|
user_data,
|
|
|
|
|
NM_DBUS_PATH,
|
|
|
|
|
NM_DBUS_INTERFACE,
|
|
|
|
|
"CheckpointRollback",
|
|
|
|
|
g_variant_new("(o)", checkpoint_path),
|
|
|
|
|
G_VARIANT_TYPE("(a{su})"),
|
|
|
|
|
G_DBUS_CALL_FLAGS_NONE,
|
|
|
|
|
NM_DBUS_DEFAULT_TIMEOUT_MSEC,
|
|
|
|
|
nm_dbus_connection_call_finish_variant_strip_dbus_error_cb);
|
2017-10-21 16:05:19 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* nm_client_checkpoint_rollback_finish:
|
|
|
|
|
* @client: an #NMClient
|
|
|
|
|
* @result: the result passed to the #GAsyncReadyCallback
|
|
|
|
|
* @error: location for a #GError, or %NULL
|
|
|
|
|
*
|
2018-04-04 13:33:53 +02:00
|
|
|
* Gets the result of a call to nm_client_checkpoint_rollback().
|
2017-10-21 16:05:19 +02:00
|
|
|
*
|
|
|
|
|
* Returns: (transfer full) (element-type utf8 guint32): an hash table of
|
|
|
|
|
* devices and results. Devices are represented by their original
|
|
|
|
|
* D-Bus path; each result is a #NMRollbackResult.
|
|
|
|
|
*
|
|
|
|
|
* Since: 1.12
|
|
|
|
|
**/
|
|
|
|
|
GHashTable *
|
|
|
|
|
nm_client_checkpoint_rollback_finish(NMClient *client, GAsyncResult *result, GError **error)
|
|
|
|
|
{
|
2019-10-06 11:06:52 +02:00
|
|
|
gs_unref_variant GVariant *ret = NULL;
|
|
|
|
|
gs_unref_variant GVariant *v_result = NULL;
|
|
|
|
|
GVariantIter iter;
|
2021-11-09 13:28:54 +01:00
|
|
|
GHashTable *hash;
|
|
|
|
|
const char *path;
|
2019-10-06 11:06:52 +02:00
|
|
|
guint32 r;
|
2017-10-21 16:05:19 +02:00
|
|
|
|
|
|
|
|
g_return_val_if_fail(NM_IS_CLIENT(client), NULL);
|
2019-10-06 11:06:52 +02:00
|
|
|
g_return_val_if_fail(nm_g_task_is_valid(result, client, nm_client_checkpoint_rollback), NULL);
|
2017-10-21 16:05:19 +02:00
|
|
|
|
2019-10-06 11:06:52 +02:00
|
|
|
ret = g_task_propagate_pointer(G_TASK(result), error);
|
|
|
|
|
if (!ret)
|
2017-10-21 16:05:19 +02:00
|
|
|
return NULL;
|
2019-10-06 11:06:52 +02:00
|
|
|
|
|
|
|
|
g_variant_get(ret, "(@a{su})", &v_result);
|
|
|
|
|
|
|
|
|
|
hash = g_hash_table_new_full(nm_str_hash, g_str_equal, g_free, NULL);
|
|
|
|
|
|
|
|
|
|
g_variant_iter_init(&iter, v_result);
|
|
|
|
|
while (g_variant_iter_next(&iter, "{&su}", &path, &r))
|
|
|
|
|
g_hash_table_insert(hash, g_strdup(path), GUINT_TO_POINTER(r));
|
|
|
|
|
|
|
|
|
|
return hash;
|
2017-10-21 16:05:19 +02:00
|
|
|
}
|
|
|
|
|
|
checkpoint: allow resetting the rollback timeout via D-Bus
This allows to adjust the timeout of an existing checkpoint.
The main usecase of checkpoints, is to have a fail-safe when
configuring the network remotely. By allowing to reset the timeout,
the user can perform a series of actions, and keep bumping the
timeout. That way, the entire series is still guarded by the same
checkpoint, but the user can start with short timeout, and
re-adjust the timeout as he goes along.
The libnm API only implements the async form (at least for now).
Sync methods are fundamentally wrong with D-Bus, and it's probably
not needed. Also, follow glib convenction, where the async form
doesn't have the _async name suffix. Also, accept a D-Bus path
as argument, not a NMCheckpoint instance. The libnm API should
not be more restricted than the underlying D-Bus API. It would
be cumbersome to require the user to lookup the NMCheckpoint
instance first, especially since libnm doesn't provide an efficient
or convenient lookup-by-path method. On the other hand, retrieving
the path from a NMCheckpoint instance is always possible.
2018-03-28 08:09:56 +02:00
|
|
|
/**
|
|
|
|
|
* nm_client_checkpoint_adjust_rollback_timeout:
|
|
|
|
|
* @client: the %NMClient
|
|
|
|
|
* @checkpoint_path: a D-Bus path to a checkpoint
|
|
|
|
|
* @add_timeout: the timeout in seconds counting from now.
|
|
|
|
|
* Set to zero, to disable the timeout.
|
|
|
|
|
* @cancellable: a #GCancellable, or %NULL
|
2024-11-27 12:51:24 +01:00
|
|
|
* @callback: (scope async) (closure user_data): callback to be called when the add operation completes
|
|
|
|
|
* @user_data: caller-specific data passed to @callback
|
checkpoint: allow resetting the rollback timeout via D-Bus
This allows to adjust the timeout of an existing checkpoint.
The main usecase of checkpoints, is to have a fail-safe when
configuring the network remotely. By allowing to reset the timeout,
the user can perform a series of actions, and keep bumping the
timeout. That way, the entire series is still guarded by the same
checkpoint, but the user can start with short timeout, and
re-adjust the timeout as he goes along.
The libnm API only implements the async form (at least for now).
Sync methods are fundamentally wrong with D-Bus, and it's probably
not needed. Also, follow glib convenction, where the async form
doesn't have the _async name suffix. Also, accept a D-Bus path
as argument, not a NMCheckpoint instance. The libnm API should
not be more restricted than the underlying D-Bus API. It would
be cumbersome to require the user to lookup the NMCheckpoint
instance first, especially since libnm doesn't provide an efficient
or convenient lookup-by-path method. On the other hand, retrieving
the path from a NMCheckpoint instance is always possible.
2018-03-28 08:09:56 +02:00
|
|
|
*
|
|
|
|
|
* Resets the timeout for the checkpoint with path @checkpoint_path
|
|
|
|
|
* to @timeout_add.
|
|
|
|
|
*
|
|
|
|
|
* Since: 1.12
|
|
|
|
|
**/
|
|
|
|
|
void
|
2021-11-09 13:28:54 +01:00
|
|
|
nm_client_checkpoint_adjust_rollback_timeout(NMClient *client,
|
|
|
|
|
const char *checkpoint_path,
|
checkpoint: allow resetting the rollback timeout via D-Bus
This allows to adjust the timeout of an existing checkpoint.
The main usecase of checkpoints, is to have a fail-safe when
configuring the network remotely. By allowing to reset the timeout,
the user can perform a series of actions, and keep bumping the
timeout. That way, the entire series is still guarded by the same
checkpoint, but the user can start with short timeout, and
re-adjust the timeout as he goes along.
The libnm API only implements the async form (at least for now).
Sync methods are fundamentally wrong with D-Bus, and it's probably
not needed. Also, follow glib convenction, where the async form
doesn't have the _async name suffix. Also, accept a D-Bus path
as argument, not a NMCheckpoint instance. The libnm API should
not be more restricted than the underlying D-Bus API. It would
be cumbersome to require the user to lookup the NMCheckpoint
instance first, especially since libnm doesn't provide an efficient
or convenient lookup-by-path method. On the other hand, retrieving
the path from a NMCheckpoint instance is always possible.
2018-03-28 08:09:56 +02:00
|
|
|
guint32 add_timeout,
|
2021-11-09 13:28:54 +01:00
|
|
|
GCancellable *cancellable,
|
checkpoint: allow resetting the rollback timeout via D-Bus
This allows to adjust the timeout of an existing checkpoint.
The main usecase of checkpoints, is to have a fail-safe when
configuring the network remotely. By allowing to reset the timeout,
the user can perform a series of actions, and keep bumping the
timeout. That way, the entire series is still guarded by the same
checkpoint, but the user can start with short timeout, and
re-adjust the timeout as he goes along.
The libnm API only implements the async form (at least for now).
Sync methods are fundamentally wrong with D-Bus, and it's probably
not needed. Also, follow glib convenction, where the async form
doesn't have the _async name suffix. Also, accept a D-Bus path
as argument, not a NMCheckpoint instance. The libnm API should
not be more restricted than the underlying D-Bus API. It would
be cumbersome to require the user to lookup the NMCheckpoint
instance first, especially since libnm doesn't provide an efficient
or convenient lookup-by-path method. On the other hand, retrieving
the path from a NMCheckpoint instance is always possible.
2018-03-28 08:09:56 +02:00
|
|
|
GAsyncReadyCallback callback,
|
|
|
|
|
gpointer user_data)
|
|
|
|
|
{
|
|
|
|
|
g_return_if_fail(NM_IS_CLIENT(client));
|
2018-04-04 13:33:53 +02:00
|
|
|
g_return_if_fail(checkpoint_path && checkpoint_path[0] == '/');
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
_nm_client_dbus_call(client,
|
|
|
|
|
client,
|
2019-10-06 11:16:10 +02:00
|
|
|
nm_client_checkpoint_adjust_rollback_timeout,
|
|
|
|
|
cancellable,
|
|
|
|
|
callback,
|
|
|
|
|
user_data,
|
|
|
|
|
NM_DBUS_PATH,
|
|
|
|
|
NM_DBUS_INTERFACE,
|
|
|
|
|
"CheckpointAdjustRollbackTimeout",
|
|
|
|
|
g_variant_new("(ou)", checkpoint_path, add_timeout),
|
|
|
|
|
G_VARIANT_TYPE("()"),
|
|
|
|
|
G_DBUS_CALL_FLAGS_NONE,
|
|
|
|
|
NM_DBUS_DEFAULT_TIMEOUT_MSEC,
|
|
|
|
|
nm_dbus_connection_call_finish_void_strip_dbus_error_cb);
|
checkpoint: allow resetting the rollback timeout via D-Bus
This allows to adjust the timeout of an existing checkpoint.
The main usecase of checkpoints, is to have a fail-safe when
configuring the network remotely. By allowing to reset the timeout,
the user can perform a series of actions, and keep bumping the
timeout. That way, the entire series is still guarded by the same
checkpoint, but the user can start with short timeout, and
re-adjust the timeout as he goes along.
The libnm API only implements the async form (at least for now).
Sync methods are fundamentally wrong with D-Bus, and it's probably
not needed. Also, follow glib convenction, where the async form
doesn't have the _async name suffix. Also, accept a D-Bus path
as argument, not a NMCheckpoint instance. The libnm API should
not be more restricted than the underlying D-Bus API. It would
be cumbersome to require the user to lookup the NMCheckpoint
instance first, especially since libnm doesn't provide an efficient
or convenient lookup-by-path method. On the other hand, retrieving
the path from a NMCheckpoint instance is always possible.
2018-03-28 08:09:56 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* nm_client_checkpoint_adjust_rollback_timeout_finish:
|
|
|
|
|
* @client: an #NMClient
|
|
|
|
|
* @result: the result passed to the #GAsyncReadyCallback
|
|
|
|
|
* @error: location for a #GError, or %NULL
|
|
|
|
|
*
|
|
|
|
|
* Gets the result of a call to nm_client_checkpoint_adjust_rollback_timeout().
|
|
|
|
|
*
|
|
|
|
|
* Returns: %TRUE on success or %FALSE on failure.
|
|
|
|
|
*
|
|
|
|
|
* Since: 1.12
|
|
|
|
|
**/
|
|
|
|
|
gboolean
|
2021-11-09 13:28:54 +01:00
|
|
|
nm_client_checkpoint_adjust_rollback_timeout_finish(NMClient *client,
|
checkpoint: allow resetting the rollback timeout via D-Bus
This allows to adjust the timeout of an existing checkpoint.
The main usecase of checkpoints, is to have a fail-safe when
configuring the network remotely. By allowing to reset the timeout,
the user can perform a series of actions, and keep bumping the
timeout. That way, the entire series is still guarded by the same
checkpoint, but the user can start with short timeout, and
re-adjust the timeout as he goes along.
The libnm API only implements the async form (at least for now).
Sync methods are fundamentally wrong with D-Bus, and it's probably
not needed. Also, follow glib convenction, where the async form
doesn't have the _async name suffix. Also, accept a D-Bus path
as argument, not a NMCheckpoint instance. The libnm API should
not be more restricted than the underlying D-Bus API. It would
be cumbersome to require the user to lookup the NMCheckpoint
instance first, especially since libnm doesn't provide an efficient
or convenient lookup-by-path method. On the other hand, retrieving
the path from a NMCheckpoint instance is always possible.
2018-03-28 08:09:56 +02:00
|
|
|
GAsyncResult *result,
|
2021-11-09 13:28:54 +01:00
|
|
|
GError **error)
|
checkpoint: allow resetting the rollback timeout via D-Bus
This allows to adjust the timeout of an existing checkpoint.
The main usecase of checkpoints, is to have a fail-safe when
configuring the network remotely. By allowing to reset the timeout,
the user can perform a series of actions, and keep bumping the
timeout. That way, the entire series is still guarded by the same
checkpoint, but the user can start with short timeout, and
re-adjust the timeout as he goes along.
The libnm API only implements the async form (at least for now).
Sync methods are fundamentally wrong with D-Bus, and it's probably
not needed. Also, follow glib convenction, where the async form
doesn't have the _async name suffix. Also, accept a D-Bus path
as argument, not a NMCheckpoint instance. The libnm API should
not be more restricted than the underlying D-Bus API. It would
be cumbersome to require the user to lookup the NMCheckpoint
instance first, especially since libnm doesn't provide an efficient
or convenient lookup-by-path method. On the other hand, retrieving
the path from a NMCheckpoint instance is always possible.
2018-03-28 08:09:56 +02:00
|
|
|
{
|
|
|
|
|
g_return_val_if_fail(NM_IS_CLIENT(client), FALSE);
|
2019-10-06 11:16:10 +02:00
|
|
|
g_return_val_if_fail(
|
|
|
|
|
nm_g_task_is_valid(result, client, nm_client_checkpoint_adjust_rollback_timeout),
|
|
|
|
|
FALSE);
|
checkpoint: allow resetting the rollback timeout via D-Bus
This allows to adjust the timeout of an existing checkpoint.
The main usecase of checkpoints, is to have a fail-safe when
configuring the network remotely. By allowing to reset the timeout,
the user can perform a series of actions, and keep bumping the
timeout. That way, the entire series is still guarded by the same
checkpoint, but the user can start with short timeout, and
re-adjust the timeout as he goes along.
The libnm API only implements the async form (at least for now).
Sync methods are fundamentally wrong with D-Bus, and it's probably
not needed. Also, follow glib convenction, where the async form
doesn't have the _async name suffix. Also, accept a D-Bus path
as argument, not a NMCheckpoint instance. The libnm API should
not be more restricted than the underlying D-Bus API. It would
be cumbersome to require the user to lookup the NMCheckpoint
instance first, especially since libnm doesn't provide an efficient
or convenient lookup-by-path method. On the other hand, retrieving
the path from a NMCheckpoint instance is always possible.
2018-03-28 08:09:56 +02:00
|
|
|
|
2019-10-06 11:16:10 +02:00
|
|
|
return g_task_propagate_boolean(G_TASK(result), error);
|
checkpoint: allow resetting the rollback timeout via D-Bus
This allows to adjust the timeout of an existing checkpoint.
The main usecase of checkpoints, is to have a fail-safe when
configuring the network remotely. By allowing to reset the timeout,
the user can perform a series of actions, and keep bumping the
timeout. That way, the entire series is still guarded by the same
checkpoint, but the user can start with short timeout, and
re-adjust the timeout as he goes along.
The libnm API only implements the async form (at least for now).
Sync methods are fundamentally wrong with D-Bus, and it's probably
not needed. Also, follow glib convenction, where the async form
doesn't have the _async name suffix. Also, accept a D-Bus path
as argument, not a NMCheckpoint instance. The libnm API should
not be more restricted than the underlying D-Bus API. It would
be cumbersome to require the user to lookup the NMCheckpoint
instance first, especially since libnm doesn't provide an efficient
or convenient lookup-by-path method. On the other hand, retrieving
the path from a NMCheckpoint instance is always possible.
2018-03-28 08:09:56 +02:00
|
|
|
}
|
|
|
|
|
|
2019-09-05 15:19:33 +02:00
|
|
|
/**
|
|
|
|
|
* nm_client_reload:
|
|
|
|
|
* @client: the %NMClient
|
|
|
|
|
* @flags: flags indicating what to reload.
|
|
|
|
|
* @cancellable: a #GCancellable, or %NULL
|
2024-11-27 12:51:24 +01:00
|
|
|
* @callback: (scope async) (closure user_data): callback to be called when the add operation completes
|
|
|
|
|
* @user_data: caller-specific data passed to @callback
|
2019-09-05 15:19:33 +02:00
|
|
|
*
|
|
|
|
|
* Reload NetworkManager's configuration and perform certain updates, like
|
|
|
|
|
* flushing caches or rewriting external state to disk. This is similar to
|
|
|
|
|
* sending SIGHUP to NetworkManager but it allows for more fine-grained control
|
|
|
|
|
* over what to reload (see @flags). It also allows non-root access via
|
|
|
|
|
* PolicyKit and contrary to signals it is synchronous.
|
|
|
|
|
*
|
|
|
|
|
* Since: 1.22
|
|
|
|
|
**/
|
|
|
|
|
void
|
2021-11-09 13:28:54 +01:00
|
|
|
nm_client_reload(NMClient *client,
|
2019-09-05 15:19:33 +02:00
|
|
|
NMManagerReloadFlags flags,
|
2021-11-09 13:28:54 +01:00
|
|
|
GCancellable *cancellable,
|
2019-09-05 15:19:33 +02:00
|
|
|
GAsyncReadyCallback callback,
|
|
|
|
|
gpointer user_data)
|
|
|
|
|
{
|
|
|
|
|
g_return_if_fail(NM_IS_CLIENT(client));
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
_nm_client_dbus_call(client,
|
|
|
|
|
client,
|
2019-10-06 11:26:09 +02:00
|
|
|
nm_client_reload,
|
|
|
|
|
cancellable,
|
|
|
|
|
callback,
|
|
|
|
|
user_data,
|
|
|
|
|
NM_DBUS_PATH,
|
|
|
|
|
NM_DBUS_INTERFACE,
|
|
|
|
|
"Reload",
|
|
|
|
|
g_variant_new("(u)", (guint32) flags),
|
|
|
|
|
G_VARIANT_TYPE("()"),
|
|
|
|
|
G_DBUS_CALL_FLAGS_NONE,
|
|
|
|
|
NM_DBUS_DEFAULT_TIMEOUT_MSEC,
|
|
|
|
|
nm_dbus_connection_call_finish_void_strip_dbus_error_cb);
|
2019-09-05 15:19:33 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* nm_client_reload_finish:
|
|
|
|
|
* @client: an #NMClient
|
|
|
|
|
* @result: the result passed to the #GAsyncReadyCallback
|
|
|
|
|
* @error: location for a #GError, or %NULL
|
|
|
|
|
*
|
|
|
|
|
* Gets the result of a call to nm_client_reload().
|
|
|
|
|
*
|
|
|
|
|
* Returns: %TRUE on success or %FALSE on failure.
|
|
|
|
|
*
|
|
|
|
|
* Since: 1.22
|
|
|
|
|
**/
|
|
|
|
|
gboolean
|
|
|
|
|
nm_client_reload_finish(NMClient *client, GAsyncResult *result, GError **error)
|
|
|
|
|
{
|
|
|
|
|
g_return_val_if_fail(NM_IS_CLIENT(client), FALSE);
|
2019-10-06 11:26:09 +02:00
|
|
|
g_return_val_if_fail(nm_g_task_is_valid(result, client, nm_client_reload), FALSE);
|
2019-09-05 15:19:33 +02:00
|
|
|
|
2019-10-06 11:26:09 +02:00
|
|
|
return g_task_propagate_boolean(G_TASK(result), error);
|
2019-09-05 15:19:33 +02:00
|
|
|
}
|
|
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
/*****************************************************************************/
|
2016-10-18 16:35:07 +02:00
|
|
|
|
2020-03-13 17:30:23 +01:00
|
|
|
/**
|
|
|
|
|
* nm_client_dbus_call:
|
|
|
|
|
* @client: the #NMClient
|
|
|
|
|
* @object_path: path of remote object
|
|
|
|
|
* @interface_name: D-Bus interface to invoke method on
|
|
|
|
|
* @method_name: the name of the method to invoke
|
|
|
|
|
* @parameters: (nullable): a #GVariant tuple with parameters for the method
|
|
|
|
|
* or %NULL if not passing parameters
|
|
|
|
|
* @reply_type: (nullable): the expected type of the reply (which will be a
|
|
|
|
|
* tuple), or %NULL
|
|
|
|
|
* @timeout_msec: the timeout in milliseconds, -1 to use the default
|
|
|
|
|
* timeout or %G_MAXINT for no timeout
|
|
|
|
|
* @cancellable: (nullable): a #GCancellable or %NULL
|
|
|
|
|
* @callback: (nullable): a #GAsyncReadyCallback to call when the request
|
|
|
|
|
* is satisfied or %NULL if you don't care about the result of the
|
|
|
|
|
* method invocation
|
|
|
|
|
* @user_data: the data to pass to @callback
|
|
|
|
|
*
|
|
|
|
|
* Call g_dbus_connection_call() on the current name owner with the specified
|
|
|
|
|
* arguments. Most importantly, this invokes g_dbus_connection_call() with the
|
|
|
|
|
* client's #GMainContext, so that the response is always in order with other
|
|
|
|
|
* events D-Bus events. Of course, the call uses #GTask and will invoke the
|
|
|
|
|
* callback on the current g_main_context_get_thread_default().
|
|
|
|
|
*
|
|
|
|
|
* This API is merely a convenient wrapper for g_dbus_connection_call(). You can
|
|
|
|
|
* also use g_dbus_connection_call() directly, with the same effect.
|
|
|
|
|
*
|
|
|
|
|
* Since: 1.24
|
|
|
|
|
**/
|
|
|
|
|
void
|
2021-11-09 13:28:54 +01:00
|
|
|
nm_client_dbus_call(NMClient *client,
|
|
|
|
|
const char *object_path,
|
|
|
|
|
const char *interface_name,
|
|
|
|
|
const char *method_name,
|
|
|
|
|
GVariant *parameters,
|
2020-03-13 17:30:23 +01:00
|
|
|
const GVariantType *reply_type,
|
|
|
|
|
int timeout_msec,
|
2021-11-09 13:28:54 +01:00
|
|
|
GCancellable *cancellable,
|
2020-03-13 17:30:23 +01:00
|
|
|
GAsyncReadyCallback callback,
|
|
|
|
|
gpointer user_data)
|
|
|
|
|
{
|
|
|
|
|
g_return_if_fail(NM_IS_CLIENT(client));
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2020-03-13 17:30:23 +01:00
|
|
|
_nm_client_dbus_call(client,
|
|
|
|
|
client,
|
|
|
|
|
nm_client_dbus_call,
|
|
|
|
|
cancellable,
|
|
|
|
|
callback,
|
|
|
|
|
user_data,
|
|
|
|
|
object_path,
|
|
|
|
|
interface_name,
|
|
|
|
|
method_name,
|
|
|
|
|
parameters,
|
|
|
|
|
reply_type,
|
|
|
|
|
G_DBUS_CALL_FLAGS_NONE,
|
|
|
|
|
timeout_msec == -1 ? NM_DBUS_DEFAULT_TIMEOUT_MSEC : timeout_msec,
|
|
|
|
|
nm_dbus_connection_call_finish_variant_cb);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* nm_client_dbus_call_finish:
|
|
|
|
|
* @client: the #NMClient instance
|
|
|
|
|
* @result: the result passed to the #GAsyncReadyCallback
|
|
|
|
|
* @error: location for a #GError, or %NULL
|
|
|
|
|
*
|
|
|
|
|
* Gets the result of a call to nm_client_dbus_call().
|
|
|
|
|
*
|
|
|
|
|
* Returns: (transfer full): the result #GVariant or %NULL on error.
|
|
|
|
|
*
|
|
|
|
|
* Since: 1.24
|
|
|
|
|
**/
|
|
|
|
|
GVariant *
|
|
|
|
|
nm_client_dbus_call_finish(NMClient *client, GAsyncResult *result, GError **error)
|
|
|
|
|
{
|
|
|
|
|
g_return_val_if_fail(NM_IS_CLIENT(client), FALSE);
|
|
|
|
|
g_return_val_if_fail(nm_g_task_is_valid(result, client, nm_client_dbus_call), FALSE);
|
|
|
|
|
|
|
|
|
|
return g_task_propagate_pointer(G_TASK(result), error);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*****************************************************************************/
|
|
|
|
|
|
2020-03-13 18:11:56 +01:00
|
|
|
/**
|
|
|
|
|
* nm_client_dbus_set_property:
|
|
|
|
|
* @client: the #NMClient
|
|
|
|
|
* @object_path: path of remote object
|
2020-05-22 08:48:50 +02:00
|
|
|
* @interface_name: D-Bus interface for the property to set.
|
2020-03-13 18:11:56 +01:00
|
|
|
* @property_name: the name of the property to set
|
2020-05-22 08:48:50 +02:00
|
|
|
* @value: a #GVariant with the value to set.
|
2020-03-13 18:11:56 +01:00
|
|
|
* @timeout_msec: the timeout in milliseconds, -1 to use the default
|
|
|
|
|
* timeout or %G_MAXINT for no timeout
|
|
|
|
|
* @cancellable: (nullable): a #GCancellable or %NULL
|
|
|
|
|
* @callback: (nullable): a #GAsyncReadyCallback to call when the request
|
|
|
|
|
* is satisfied or %NULL if you don't care about the result of the
|
|
|
|
|
* method invocation
|
|
|
|
|
* @user_data: the data to pass to @callback
|
|
|
|
|
*
|
|
|
|
|
* Like nm_client_dbus_call() but calls "Set" on the standard "org.freedesktop.DBus.Properties"
|
|
|
|
|
* D-Bus interface.
|
|
|
|
|
*
|
|
|
|
|
* Since: 1.24
|
|
|
|
|
**/
|
|
|
|
|
void
|
2021-11-09 13:28:54 +01:00
|
|
|
nm_client_dbus_set_property(NMClient *client,
|
|
|
|
|
const char *object_path,
|
|
|
|
|
const char *interface_name,
|
|
|
|
|
const char *property_name,
|
|
|
|
|
GVariant *value,
|
2020-03-13 18:11:56 +01:00
|
|
|
int timeout_msec,
|
2021-11-09 13:28:54 +01:00
|
|
|
GCancellable *cancellable,
|
2020-03-13 18:11:56 +01:00
|
|
|
GAsyncReadyCallback callback,
|
|
|
|
|
gpointer user_data)
|
|
|
|
|
{
|
|
|
|
|
g_return_if_fail(NM_IS_CLIENT(client));
|
|
|
|
|
g_return_if_fail(interface_name);
|
|
|
|
|
g_return_if_fail(property_name);
|
|
|
|
|
g_return_if_fail(value);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2020-03-13 18:11:56 +01:00
|
|
|
_nm_client_dbus_call(client,
|
|
|
|
|
client,
|
|
|
|
|
nm_client_dbus_set_property,
|
|
|
|
|
cancellable,
|
|
|
|
|
callback,
|
|
|
|
|
user_data,
|
|
|
|
|
object_path,
|
|
|
|
|
DBUS_INTERFACE_PROPERTIES,
|
|
|
|
|
"Set",
|
|
|
|
|
g_variant_new("(ssv)", interface_name, property_name, value),
|
|
|
|
|
G_VARIANT_TYPE("()"),
|
|
|
|
|
G_DBUS_CALL_FLAGS_NONE,
|
|
|
|
|
timeout_msec == -1 ? NM_DBUS_DEFAULT_TIMEOUT_MSEC : timeout_msec,
|
|
|
|
|
nm_dbus_connection_call_finish_void_cb);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* nm_client_dbus_set_property_finish:
|
|
|
|
|
* @client: the #NMClient instance
|
|
|
|
|
* @result: the result passed to the #GAsyncReadyCallback
|
|
|
|
|
* @error: location for a #GError, or %NULL
|
|
|
|
|
*
|
|
|
|
|
* Gets the result of a call to nm_client_dbus_set_property().
|
|
|
|
|
*
|
|
|
|
|
* Returns: %TRUE on success or %FALSE on failure.
|
|
|
|
|
*
|
|
|
|
|
* Since: 1.24
|
|
|
|
|
**/
|
|
|
|
|
gboolean
|
|
|
|
|
nm_client_dbus_set_property_finish(NMClient *client, GAsyncResult *result, GError **error)
|
|
|
|
|
{
|
|
|
|
|
g_return_val_if_fail(NM_IS_CLIENT(client), FALSE);
|
|
|
|
|
g_return_val_if_fail(nm_g_task_is_valid(result, client, nm_client_dbus_set_property), FALSE);
|
|
|
|
|
|
|
|
|
|
return g_task_propagate_boolean(G_TASK(result), error);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*****************************************************************************/
|
|
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
static void
|
|
|
|
|
_init_fetch_all(NMClient *self)
|
2016-10-18 16:35:07 +02:00
|
|
|
{
|
2021-11-09 13:28:54 +01:00
|
|
|
NMClientPrivate *priv = NM_CLIENT_GET_PRIVATE(self);
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
nm_auto_pop_gmaincontext GMainContext *dbus_context = NULL;
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
dbus_context = nm_g_main_context_push_thread_default_if_necessary(priv->dbus_context);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
NML_NMCLIENT_LOG_D(self, "fetch all");
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
nm_assert(!priv->get_managed_objects_cancellable);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
priv->get_managed_objects_cancellable = g_cancellable_new();
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2020-01-14 12:38:07 +01:00
|
|
|
priv->dbsid_nm_object_manager =
|
|
|
|
|
nm_dbus_connection_signal_subscribe_object_manager(priv->dbus_connection,
|
|
|
|
|
priv->name_owner,
|
|
|
|
|
"/org/freedesktop",
|
|
|
|
|
NULL,
|
|
|
|
|
_dbus_managed_objects_changed_cb,
|
|
|
|
|
self,
|
|
|
|
|
NULL);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
priv->dbsid_dbus_properties_properties_changed =
|
|
|
|
|
nm_dbus_connection_signal_subscribe_properties_changed(priv->dbus_connection,
|
|
|
|
|
priv->name_owner,
|
|
|
|
|
NULL,
|
|
|
|
|
NULL,
|
|
|
|
|
_dbus_properties_changed_cb,
|
|
|
|
|
self,
|
|
|
|
|
NULL);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
priv->dbsid_nm_settings_connection_updated =
|
|
|
|
|
g_dbus_connection_signal_subscribe(priv->dbus_connection,
|
|
|
|
|
priv->name_owner,
|
|
|
|
|
NM_DBUS_INTERFACE_SETTINGS_CONNECTION,
|
|
|
|
|
"Updated",
|
|
|
|
|
NULL,
|
2020-09-28 16:03:33 +02:00
|
|
|
NULL,
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
G_DBUS_SIGNAL_FLAGS_NONE,
|
|
|
|
|
_dbus_settings_updated_cb,
|
|
|
|
|
self,
|
|
|
|
|
NULL);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
priv->dbsid_nm_connection_active_state_changed =
|
|
|
|
|
g_dbus_connection_signal_subscribe(priv->dbus_connection,
|
|
|
|
|
priv->name_owner,
|
|
|
|
|
NM_DBUS_INTERFACE_ACTIVE_CONNECTION,
|
|
|
|
|
"StateChanged",
|
|
|
|
|
NULL,
|
2020-09-28 16:03:33 +02:00
|
|
|
NULL,
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
G_DBUS_SIGNAL_FLAGS_NONE,
|
|
|
|
|
_dbus_nm_connection_active_state_changed_cb,
|
|
|
|
|
self,
|
|
|
|
|
NULL);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
priv->dbsid_nm_vpn_connection_state_changed =
|
|
|
|
|
g_dbus_connection_signal_subscribe(priv->dbus_connection,
|
|
|
|
|
priv->name_owner,
|
|
|
|
|
NM_DBUS_INTERFACE_VPN_CONNECTION,
|
|
|
|
|
"VpnStateChanged",
|
|
|
|
|
NULL,
|
2020-09-28 16:03:33 +02:00
|
|
|
NULL,
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
G_DBUS_SIGNAL_FLAGS_NONE,
|
|
|
|
|
_dbus_nm_vpn_connection_state_changed_cb,
|
|
|
|
|
self,
|
|
|
|
|
NULL);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
priv->dbsid_nm_check_permissions =
|
|
|
|
|
g_dbus_connection_signal_subscribe(priv->dbus_connection,
|
|
|
|
|
priv->name_owner,
|
|
|
|
|
NM_DBUS_INTERFACE,
|
|
|
|
|
"CheckPermissions",
|
|
|
|
|
NULL,
|
2020-09-28 16:03:33 +02:00
|
|
|
NULL,
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
G_DBUS_SIGNAL_FLAGS_NONE,
|
|
|
|
|
_dbus_nm_check_permissions_cb,
|
|
|
|
|
self,
|
|
|
|
|
NULL);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: fix leaking internal GMainContext for synchronously initialized NMClient
NMClient makes asynchronous D-Bus calls via g_dbus_connection_call().
This references the current GMainContext to later invoke the
asynchronous callback. Even when we cancel the asynchronous call,
the callback will still be invoked (later) to complete the request.
In particular this means when we destroy (unref) an NMClient, there
are quite possibly pending requests in the GMainContext. Although they
are cancelled, they keep the GMainContext alive.
With synchronous initialization, we have an internal GMainContext.
When we destroy the NMClient, we cannot just unhook the integrated
source, instead, we need to keep it integrated in the caller's main
context, as long as there are pending requests.
Add a mechanism to track those pending requests and fix the leak for the
internal GMainContext. Also expose the same mechanism to the user via a new
API called nm_client_get_context_busy_watcher(). This allows the user
to know when it can stop iterating the main context and when all
resources are reclaimed.
For example the following will lead to a crash:
for i in range(1,2000):
nmc = NM.Client.new(None)
This creates a number of NMClient instances and destroys them again.
Note that here the GMainContext is never iterated, because
synchronous initialization does not iterate the caller's context. So,
while we correctly unref and dispose the created NMClient instances,
there are pending requests left in the inner GMainContext. These pile
up and soon the program will crash because it runs out of file descriptors.
We can have a similar problem with asynchronous initialization, when
we create a new GMainContext per client, and don't iterate it after
we are done with the client.
Note that this patch does not avoid the problem in general. The problem
cannot be avoided, the user must iterate the main contex at some point.
Otherwise resources (memory and file descriptors) will be leaked.
Fixes: ce0e898fb476 ('libnm: refactor caching of D-Bus objects in NMClient')
https://gitlab.freedesktop.org/NetworkManager/NetworkManager/merge_requests/347
2019-11-26 00:23:41 +01:00
|
|
|
g_dbus_connection_call(priv->dbus_connection,
|
|
|
|
|
priv->name_owner,
|
|
|
|
|
"/org/freedesktop",
|
|
|
|
|
DBUS_INTERFACE_OBJECT_MANAGER,
|
|
|
|
|
"GetManagedObjects",
|
|
|
|
|
NULL,
|
|
|
|
|
G_VARIANT_TYPE("(a{oa{sa{sv}}})"),
|
|
|
|
|
G_DBUS_CALL_FLAGS_NO_AUTO_START,
|
|
|
|
|
NM_DBUS_DEFAULT_TIMEOUT_MSEC,
|
|
|
|
|
priv->get_managed_objects_cancellable,
|
|
|
|
|
_dbus_get_managed_objects_cb,
|
|
|
|
|
nm_utils_user_data_pack(self, g_object_ref(priv->context_busy_watcher)));
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
_dbus_check_permissions_start(self);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
_init_release_all(NMClient *self)
|
2016-10-18 16:35:07 +02:00
|
|
|
{
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
NMClientPrivate *priv = NM_CLIENT_GET_PRIVATE(self);
|
2021-11-09 13:28:54 +01:00
|
|
|
CList **dbus_objects_lst_heads;
|
|
|
|
|
NMLDBusObject *dbobj;
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
int i;
|
2019-12-05 15:53:51 +01:00
|
|
|
gboolean permissions_state_changed = FALSE;
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
NML_NMCLIENT_LOG_D(self, "release all");
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
nm_clear_g_cancellable(&priv->permissions_cancellable);
|
|
|
|
|
nm_clear_g_cancellable(&priv->get_managed_objects_cancellable);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
nm_clear_g_dbus_connection_signal(priv->dbus_connection, &priv->dbsid_nm_object_manager);
|
|
|
|
|
nm_clear_g_dbus_connection_signal(priv->dbus_connection,
|
|
|
|
|
&priv->dbsid_dbus_properties_properties_changed);
|
|
|
|
|
nm_clear_g_dbus_connection_signal(priv->dbus_connection,
|
|
|
|
|
&priv->dbsid_nm_settings_connection_updated);
|
|
|
|
|
nm_clear_g_dbus_connection_signal(priv->dbus_connection,
|
|
|
|
|
&priv->dbsid_nm_connection_active_state_changed);
|
|
|
|
|
nm_clear_g_dbus_connection_signal(priv->dbus_connection,
|
|
|
|
|
&priv->dbsid_nm_vpn_connection_state_changed);
|
|
|
|
|
nm_clear_g_dbus_connection_signal(priv->dbus_connection, &priv->dbsid_nm_check_permissions);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2019-12-05 15:53:51 +01:00
|
|
|
if (priv->permissions_state != NM_TERNARY_DEFAULT) {
|
|
|
|
|
priv->permissions_state = NM_TERNARY_DEFAULT;
|
|
|
|
|
permissions_state_changed = TRUE;
|
|
|
|
|
}
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
if (priv->permissions) {
|
2019-12-06 11:48:04 +01:00
|
|
|
gs_free guint8 *old_permissions = g_steal_pointer(&priv->permissions);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2019-12-06 11:48:04 +01:00
|
|
|
_emit_permissions_changed(self, old_permissions, NULL);
|
2016-10-18 16:35:07 +02:00
|
|
|
}
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2019-12-05 15:53:51 +01:00
|
|
|
if (permissions_state_changed)
|
|
|
|
|
_notify(self, PROP_PERMISSIONS_STATE);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
nm_assert(c_list_is_empty(&priv->obj_changed_lst_head));
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2024-10-04 10:45:56 +02:00
|
|
|
dbus_objects_lst_heads = ((CList *[]) {
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
&priv->dbus_objects_lst_head_on_dbus,
|
|
|
|
|
&priv->dbus_objects_lst_head_with_nmobj_not_ready,
|
|
|
|
|
&priv->dbus_objects_lst_head_with_nmobj_ready,
|
|
|
|
|
NULL,
|
|
|
|
|
});
|
|
|
|
|
for (i = 0; dbus_objects_lst_heads[i]; i++) {
|
|
|
|
|
c_list_for_each_entry (dbobj, dbus_objects_lst_heads[i], dbus_objects_lst) {
|
|
|
|
|
NMLDBusObjIfaceData *db_iface_data;
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
nm_assert(c_list_is_empty(&dbobj->obj_changed_lst));
|
|
|
|
|
c_list_for_each_entry (db_iface_data, &dbobj->iface_lst_head, iface_lst)
|
|
|
|
|
db_iface_data->iface_removed = TRUE;
|
|
|
|
|
nml_dbus_object_obj_changed_link(self, dbobj, NML_DBUS_OBJ_CHANGED_TYPE_DBUS);
|
2018-05-04 09:02:53 +02:00
|
|
|
}
|
2017-03-20 12:27:17 +01:00
|
|
|
}
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
_dbus_handle_changes(self, "release-all", FALSE);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
/* We require that when we remove all D-Bus interfaces, that all object will go
|
|
|
|
|
* away. Note that a NMLDBusObject can be alive due to a NMLDBusObjWatcher, but
|
|
|
|
|
* even those should be all cleaned up. */
|
|
|
|
|
nm_assert(c_list_is_empty(&priv->obj_changed_lst_head));
|
|
|
|
|
nm_assert(c_list_is_empty(&priv->dbus_objects_lst_head_watched_only));
|
|
|
|
|
nm_assert(c_list_is_empty(&priv->dbus_objects_lst_head_on_dbus));
|
|
|
|
|
nm_assert(c_list_is_empty(&priv->dbus_objects_lst_head_with_nmobj_not_ready));
|
|
|
|
|
nm_assert(c_list_is_empty(&priv->dbus_objects_lst_head_with_nmobj_ready));
|
2020-12-08 15:41:46 +01:00
|
|
|
nm_assert(nm_g_hash_table_size(priv->dbus_objects) == 0);
|
2016-10-18 16:35:07 +02:00
|
|
|
}
|
|
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
/*****************************************************************************/
|
|
|
|
|
|
2014-07-24 08:53:33 -04:00
|
|
|
static void
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
name_owner_changed(NMClient *self, const char *name_owner)
|
2016-10-18 16:35:07 +02:00
|
|
|
{
|
2021-11-09 13:28:54 +01:00
|
|
|
NMClientPrivate *priv = NM_CLIENT_GET_PRIVATE(self);
|
|
|
|
|
gboolean changed;
|
|
|
|
|
gs_free char *old_name_owner_free = NULL;
|
|
|
|
|
const char *old_name_owner;
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
nm_auto_pop_gmaincontext GMainContext *dbus_context = NULL;
|
2016-10-18 16:35:07 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
name_owner = nm_str_not_empty(name_owner);
|
2016-10-18 16:35:07 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
changed = !nm_streq0(priv->name_owner, name_owner);
|
2016-10-18 16:35:07 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
if (!name_owner && priv->main_context != priv->dbus_context) {
|
libnm: fix leaking internal GMainContext for synchronously initialized NMClient
NMClient makes asynchronous D-Bus calls via g_dbus_connection_call().
This references the current GMainContext to later invoke the
asynchronous callback. Even when we cancel the asynchronous call,
the callback will still be invoked (later) to complete the request.
In particular this means when we destroy (unref) an NMClient, there
are quite possibly pending requests in the GMainContext. Although they
are cancelled, they keep the GMainContext alive.
With synchronous initialization, we have an internal GMainContext.
When we destroy the NMClient, we cannot just unhook the integrated
source, instead, we need to keep it integrated in the caller's main
context, as long as there are pending requests.
Add a mechanism to track those pending requests and fix the leak for the
internal GMainContext. Also expose the same mechanism to the user via a new
API called nm_client_get_context_busy_watcher(). This allows the user
to know when it can stop iterating the main context and when all
resources are reclaimed.
For example the following will lead to a crash:
for i in range(1,2000):
nmc = NM.Client.new(None)
This creates a number of NMClient instances and destroys them again.
Note that here the GMainContext is never iterated, because
synchronous initialization does not iterate the caller's context. So,
while we correctly unref and dispose the created NMClient instances,
there are pending requests left in the inner GMainContext. These pile
up and soon the program will crash because it runs out of file descriptors.
We can have a similar problem with asynchronous initialization, when
we create a new GMainContext per client, and don't iterate it after
we are done with the client.
Note that this patch does not avoid the problem in general. The problem
cannot be avoided, the user must iterate the main contex at some point.
Otherwise resources (memory and file descriptors) will be leaked.
Fixes: ce0e898fb476 ('libnm: refactor caching of D-Bus objects in NMClient')
https://gitlab.freedesktop.org/NetworkManager/NetworkManager/merge_requests/347
2019-11-26 00:23:41 +01:00
|
|
|
gs_unref_object GObject *old_context_busy_watcher = NULL;
|
2016-10-18 16:35:07 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
NML_NMCLIENT_LOG_D(self, "resync main context as we have no name owner");
|
2016-10-18 16:35:07 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
nm_clear_g_dbus_connection_signal(priv->dbus_connection, &priv->name_owner_changed_id);
|
2016-10-18 16:35:07 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
/* Our instance was initialized synchronously. Usually we must henceforth
|
|
|
|
|
* stick to a internal main context. But now we have no name-owner...
|
|
|
|
|
* at this point, we anyway are going to do a full resync. Swap the main
|
|
|
|
|
* contexts again. */
|
2016-10-18 16:35:07 +02:00
|
|
|
|
libnm: fix leaking internal GMainContext for synchronously initialized NMClient
NMClient makes asynchronous D-Bus calls via g_dbus_connection_call().
This references the current GMainContext to later invoke the
asynchronous callback. Even when we cancel the asynchronous call,
the callback will still be invoked (later) to complete the request.
In particular this means when we destroy (unref) an NMClient, there
are quite possibly pending requests in the GMainContext. Although they
are cancelled, they keep the GMainContext alive.
With synchronous initialization, we have an internal GMainContext.
When we destroy the NMClient, we cannot just unhook the integrated
source, instead, we need to keep it integrated in the caller's main
context, as long as there are pending requests.
Add a mechanism to track those pending requests and fix the leak for the
internal GMainContext. Also expose the same mechanism to the user via a new
API called nm_client_get_context_busy_watcher(). This allows the user
to know when it can stop iterating the main context and when all
resources are reclaimed.
For example the following will lead to a crash:
for i in range(1,2000):
nmc = NM.Client.new(None)
This creates a number of NMClient instances and destroys them again.
Note that here the GMainContext is never iterated, because
synchronous initialization does not iterate the caller's context. So,
while we correctly unref and dispose the created NMClient instances,
there are pending requests left in the inner GMainContext. These pile
up and soon the program will crash because it runs out of file descriptors.
We can have a similar problem with asynchronous initialization, when
we create a new GMainContext per client, and don't iterate it after
we are done with the client.
Note that this patch does not avoid the problem in general. The problem
cannot be avoided, the user must iterate the main contex at some point.
Otherwise resources (memory and file descriptors) will be leaked.
Fixes: ce0e898fb476 ('libnm: refactor caching of D-Bus objects in NMClient')
https://gitlab.freedesktop.org/NetworkManager/NetworkManager/merge_requests/347
2019-11-26 00:23:41 +01:00
|
|
|
old_context_busy_watcher = g_steal_pointer(&priv->context_busy_watcher);
|
|
|
|
|
priv->context_busy_watcher = g_object_ref(
|
|
|
|
|
g_object_get_qdata(old_context_busy_watcher, nm_context_busy_watcher_quark()));
|
|
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
g_main_context_ref(priv->main_context);
|
|
|
|
|
g_main_context_unref(priv->dbus_context);
|
|
|
|
|
priv->dbus_context = priv->main_context;
|
2016-10-18 16:35:07 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
dbus_context = nm_g_main_context_push_thread_default_if_necessary(priv->dbus_context);
|
2016-10-25 11:11:12 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
/* we need to sync again... */
|
2016-10-25 11:11:12 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
_assert_main_context_is_current_thread_default(self, dbus_context);
|
2016-10-18 16:35:07 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
priv->name_owner_changed_id =
|
|
|
|
|
nm_dbus_connection_signal_subscribe_name_owner_changed(priv->dbus_connection,
|
|
|
|
|
NM_DBUS_SERVICE,
|
|
|
|
|
name_owner_changed_cb,
|
|
|
|
|
self,
|
|
|
|
|
NULL);
|
libnm: fix leaking internal GMainContext for synchronously initialized NMClient
NMClient makes asynchronous D-Bus calls via g_dbus_connection_call().
This references the current GMainContext to later invoke the
asynchronous callback. Even when we cancel the asynchronous call,
the callback will still be invoked (later) to complete the request.
In particular this means when we destroy (unref) an NMClient, there
are quite possibly pending requests in the GMainContext. Although they
are cancelled, they keep the GMainContext alive.
With synchronous initialization, we have an internal GMainContext.
When we destroy the NMClient, we cannot just unhook the integrated
source, instead, we need to keep it integrated in the caller's main
context, as long as there are pending requests.
Add a mechanism to track those pending requests and fix the leak for the
internal GMainContext. Also expose the same mechanism to the user via a new
API called nm_client_get_context_busy_watcher(). This allows the user
to know when it can stop iterating the main context and when all
resources are reclaimed.
For example the following will lead to a crash:
for i in range(1,2000):
nmc = NM.Client.new(None)
This creates a number of NMClient instances and destroys them again.
Note that here the GMainContext is never iterated, because
synchronous initialization does not iterate the caller's context. So,
while we correctly unref and dispose the created NMClient instances,
there are pending requests left in the inner GMainContext. These pile
up and soon the program will crash because it runs out of file descriptors.
We can have a similar problem with asynchronous initialization, when
we create a new GMainContext per client, and don't iterate it after
we are done with the client.
Note that this patch does not avoid the problem in general. The problem
cannot be avoided, the user must iterate the main contex at some point.
Otherwise resources (memory and file descriptors) will be leaked.
Fixes: ce0e898fb476 ('libnm: refactor caching of D-Bus objects in NMClient')
https://gitlab.freedesktop.org/NetworkManager/NetworkManager/merge_requests/347
2019-11-26 00:23:41 +01:00
|
|
|
name_owner_get_call(self);
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
} else
|
|
|
|
|
dbus_context = nm_g_main_context_push_thread_default_if_necessary(priv->dbus_context);
|
2014-07-24 08:53:33 -04:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
if (changed) {
|
|
|
|
|
NML_NMCLIENT_LOG_D(self,
|
|
|
|
|
"name owner changed: %s%s%s -> %s%s%s",
|
|
|
|
|
NM_PRINT_FMT_QUOTE_STRING(priv->name_owner),
|
|
|
|
|
NM_PRINT_FMT_QUOTE_STRING(name_owner));
|
|
|
|
|
old_name_owner_free = priv->name_owner;
|
|
|
|
|
priv->name_owner = g_strdup(name_owner);
|
|
|
|
|
old_name_owner = old_name_owner_free;
|
|
|
|
|
} else
|
|
|
|
|
old_name_owner = priv->name_owner;
|
2016-10-18 16:35:07 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
if (changed)
|
|
|
|
|
_notify(self, PROP_DBUS_NAME_OWNER);
|
2016-10-18 16:35:07 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
if (changed && old_name_owner)
|
|
|
|
|
_init_release_all(self);
|
2017-06-21 15:57:07 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
if (changed && priv->name_owner)
|
|
|
|
|
_init_fetch_all(self);
|
2017-06-21 15:57:07 +02:00
|
|
|
|
2023-01-16 08:10:37 +01:00
|
|
|
_set_nm_running(self, FALSE);
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
|
|
|
|
|
if (priv->init_data) {
|
|
|
|
|
nm_auto_pop_gmaincontext GMainContext *main_context = NULL;
|
2017-06-21 15:57:07 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
if (priv->main_context != priv->dbus_context)
|
|
|
|
|
main_context = nm_g_main_context_push_thread_default_if_necessary(priv->main_context);
|
|
|
|
|
_init_start_check_complete(self);
|
|
|
|
|
}
|
|
|
|
|
}
|
2016-10-18 16:35:07 +02:00
|
|
|
|
2014-07-24 08:53:33 -04:00
|
|
|
static void
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
name_owner_changed_cb(GDBusConnection *connection,
|
2021-11-09 13:28:54 +01:00
|
|
|
const char *sender_name,
|
|
|
|
|
const char *object_path,
|
|
|
|
|
const char *interface_name,
|
|
|
|
|
const char *signal_name,
|
|
|
|
|
GVariant *parameters,
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
gpointer user_data)
|
2014-07-24 08:53:33 -04:00
|
|
|
{
|
2021-11-09 13:28:54 +01:00
|
|
|
NMClient *self = user_data;
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
NMClientPrivate *priv;
|
2021-11-09 13:28:54 +01:00
|
|
|
const char *new_owner;
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
|
|
|
|
|
if (!g_variant_is_of_type(parameters, G_VARIANT_TYPE("(sss)")))
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
priv = NM_CLIENT_GET_PRIVATE(self);
|
|
|
|
|
if (priv->name_owner_get_cancellable)
|
2017-06-21 15:34:34 +02:00
|
|
|
return;
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
|
|
|
|
|
g_variant_get(parameters, "(&s&s&s)", NULL, NULL, &new_owner);
|
|
|
|
|
|
|
|
|
|
name_owner_changed(self, new_owner);
|
2014-07-24 08:53:33 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
libnm: fix leaking internal GMainContext for synchronously initialized NMClient
NMClient makes asynchronous D-Bus calls via g_dbus_connection_call().
This references the current GMainContext to later invoke the
asynchronous callback. Even when we cancel the asynchronous call,
the callback will still be invoked (later) to complete the request.
In particular this means when we destroy (unref) an NMClient, there
are quite possibly pending requests in the GMainContext. Although they
are cancelled, they keep the GMainContext alive.
With synchronous initialization, we have an internal GMainContext.
When we destroy the NMClient, we cannot just unhook the integrated
source, instead, we need to keep it integrated in the caller's main
context, as long as there are pending requests.
Add a mechanism to track those pending requests and fix the leak for the
internal GMainContext. Also expose the same mechanism to the user via a new
API called nm_client_get_context_busy_watcher(). This allows the user
to know when it can stop iterating the main context and when all
resources are reclaimed.
For example the following will lead to a crash:
for i in range(1,2000):
nmc = NM.Client.new(None)
This creates a number of NMClient instances and destroys them again.
Note that here the GMainContext is never iterated, because
synchronous initialization does not iterate the caller's context. So,
while we correctly unref and dispose the created NMClient instances,
there are pending requests left in the inner GMainContext. These pile
up and soon the program will crash because it runs out of file descriptors.
We can have a similar problem with asynchronous initialization, when
we create a new GMainContext per client, and don't iterate it after
we are done with the client.
Note that this patch does not avoid the problem in general. The problem
cannot be avoided, the user must iterate the main contex at some point.
Otherwise resources (memory and file descriptors) will be leaked.
Fixes: ce0e898fb476 ('libnm: refactor caching of D-Bus objects in NMClient')
https://gitlab.freedesktop.org/NetworkManager/NetworkManager/merge_requests/347
2019-11-26 00:23:41 +01:00
|
|
|
name_owner_get_cb(GObject *source, GAsyncResult *result, gpointer user_data)
|
2014-07-24 08:53:33 -04:00
|
|
|
{
|
2021-11-09 13:28:54 +01:00
|
|
|
NMClient *self;
|
|
|
|
|
NMClientPrivate *priv;
|
|
|
|
|
gs_unref_object GObject *context_busy_watcher = NULL;
|
|
|
|
|
gs_unref_variant GVariant *ret = NULL;
|
|
|
|
|
gs_free_error GError *error = NULL;
|
|
|
|
|
const char *name_owner = NULL;
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
|
libnm: fix leaking internal GMainContext for synchronously initialized NMClient
NMClient makes asynchronous D-Bus calls via g_dbus_connection_call().
This references the current GMainContext to later invoke the
asynchronous callback. Even when we cancel the asynchronous call,
the callback will still be invoked (later) to complete the request.
In particular this means when we destroy (unref) an NMClient, there
are quite possibly pending requests in the GMainContext. Although they
are cancelled, they keep the GMainContext alive.
With synchronous initialization, we have an internal GMainContext.
When we destroy the NMClient, we cannot just unhook the integrated
source, instead, we need to keep it integrated in the caller's main
context, as long as there are pending requests.
Add a mechanism to track those pending requests and fix the leak for the
internal GMainContext. Also expose the same mechanism to the user via a new
API called nm_client_get_context_busy_watcher(). This allows the user
to know when it can stop iterating the main context and when all
resources are reclaimed.
For example the following will lead to a crash:
for i in range(1,2000):
nmc = NM.Client.new(None)
This creates a number of NMClient instances and destroys them again.
Note that here the GMainContext is never iterated, because
synchronous initialization does not iterate the caller's context. So,
while we correctly unref and dispose the created NMClient instances,
there are pending requests left in the inner GMainContext. These pile
up and soon the program will crash because it runs out of file descriptors.
We can have a similar problem with asynchronous initialization, when
we create a new GMainContext per client, and don't iterate it after
we are done with the client.
Note that this patch does not avoid the problem in general. The problem
cannot be avoided, the user must iterate the main contex at some point.
Otherwise resources (memory and file descriptors) will be leaked.
Fixes: ce0e898fb476 ('libnm: refactor caching of D-Bus objects in NMClient')
https://gitlab.freedesktop.org/NetworkManager/NetworkManager/merge_requests/347
2019-11-26 00:23:41 +01:00
|
|
|
nm_utils_user_data_unpack(user_data, &self, &context_busy_watcher);
|
|
|
|
|
|
|
|
|
|
ret = g_dbus_connection_call_finish(G_DBUS_CONNECTION(source), result, &error);
|
|
|
|
|
|
2020-01-23 10:36:24 +01:00
|
|
|
if (!ret && nm_utils_error_is_cancelled(error))
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
return;
|
2014-07-24 08:53:33 -04:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
priv = NM_CLIENT_GET_PRIVATE(self);
|
2017-06-21 15:20:19 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
g_clear_object(&priv->name_owner_get_cancellable);
|
2014-09-29 10:58:16 -04:00
|
|
|
|
libnm: fix leaking internal GMainContext for synchronously initialized NMClient
NMClient makes asynchronous D-Bus calls via g_dbus_connection_call().
This references the current GMainContext to later invoke the
asynchronous callback. Even when we cancel the asynchronous call,
the callback will still be invoked (later) to complete the request.
In particular this means when we destroy (unref) an NMClient, there
are quite possibly pending requests in the GMainContext. Although they
are cancelled, they keep the GMainContext alive.
With synchronous initialization, we have an internal GMainContext.
When we destroy the NMClient, we cannot just unhook the integrated
source, instead, we need to keep it integrated in the caller's main
context, as long as there are pending requests.
Add a mechanism to track those pending requests and fix the leak for the
internal GMainContext. Also expose the same mechanism to the user via a new
API called nm_client_get_context_busy_watcher(). This allows the user
to know when it can stop iterating the main context and when all
resources are reclaimed.
For example the following will lead to a crash:
for i in range(1,2000):
nmc = NM.Client.new(None)
This creates a number of NMClient instances and destroys them again.
Note that here the GMainContext is never iterated, because
synchronous initialization does not iterate the caller's context. So,
while we correctly unref and dispose the created NMClient instances,
there are pending requests left in the inner GMainContext. These pile
up and soon the program will crash because it runs out of file descriptors.
We can have a similar problem with asynchronous initialization, when
we create a new GMainContext per client, and don't iterate it after
we are done with the client.
Note that this patch does not avoid the problem in general. The problem
cannot be avoided, the user must iterate the main contex at some point.
Otherwise resources (memory and file descriptors) will be leaked.
Fixes: ce0e898fb476 ('libnm: refactor caching of D-Bus objects in NMClient')
https://gitlab.freedesktop.org/NetworkManager/NetworkManager/merge_requests/347
2019-11-26 00:23:41 +01:00
|
|
|
if (ret)
|
|
|
|
|
g_variant_get(ret, "(&s)", &name_owner);
|
|
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
name_owner_changed(self, name_owner);
|
2014-09-29 10:58:16 -04:00
|
|
|
}
|
|
|
|
|
|
libnm: fix leaking internal GMainContext for synchronously initialized NMClient
NMClient makes asynchronous D-Bus calls via g_dbus_connection_call().
This references the current GMainContext to later invoke the
asynchronous callback. Even when we cancel the asynchronous call,
the callback will still be invoked (later) to complete the request.
In particular this means when we destroy (unref) an NMClient, there
are quite possibly pending requests in the GMainContext. Although they
are cancelled, they keep the GMainContext alive.
With synchronous initialization, we have an internal GMainContext.
When we destroy the NMClient, we cannot just unhook the integrated
source, instead, we need to keep it integrated in the caller's main
context, as long as there are pending requests.
Add a mechanism to track those pending requests and fix the leak for the
internal GMainContext. Also expose the same mechanism to the user via a new
API called nm_client_get_context_busy_watcher(). This allows the user
to know when it can stop iterating the main context and when all
resources are reclaimed.
For example the following will lead to a crash:
for i in range(1,2000):
nmc = NM.Client.new(None)
This creates a number of NMClient instances and destroys them again.
Note that here the GMainContext is never iterated, because
synchronous initialization does not iterate the caller's context. So,
while we correctly unref and dispose the created NMClient instances,
there are pending requests left in the inner GMainContext. These pile
up and soon the program will crash because it runs out of file descriptors.
We can have a similar problem with asynchronous initialization, when
we create a new GMainContext per client, and don't iterate it after
we are done with the client.
Note that this patch does not avoid the problem in general. The problem
cannot be avoided, the user must iterate the main contex at some point.
Otherwise resources (memory and file descriptors) will be leaked.
Fixes: ce0e898fb476 ('libnm: refactor caching of D-Bus objects in NMClient')
https://gitlab.freedesktop.org/NetworkManager/NetworkManager/merge_requests/347
2019-11-26 00:23:41 +01:00
|
|
|
static void
|
|
|
|
|
name_owner_get_call(NMClient *self)
|
|
|
|
|
{
|
|
|
|
|
NMClientPrivate *priv = NM_CLIENT_GET_PRIVATE(self);
|
|
|
|
|
|
|
|
|
|
nm_assert(!priv->name_owner_get_cancellable);
|
|
|
|
|
priv->name_owner_get_cancellable = g_cancellable_new();
|
|
|
|
|
|
|
|
|
|
g_dbus_connection_call(priv->dbus_connection,
|
|
|
|
|
DBUS_SERVICE_DBUS,
|
|
|
|
|
DBUS_PATH_DBUS,
|
|
|
|
|
DBUS_INTERFACE_DBUS,
|
|
|
|
|
"GetNameOwner",
|
|
|
|
|
g_variant_new("(s)", NM_DBUS_SERVICE),
|
|
|
|
|
G_VARIANT_TYPE("(s)"),
|
|
|
|
|
G_DBUS_CALL_FLAGS_NONE,
|
|
|
|
|
NM_DBUS_DEFAULT_TIMEOUT_MSEC,
|
|
|
|
|
priv->name_owner_get_cancellable,
|
|
|
|
|
name_owner_get_cb,
|
|
|
|
|
nm_utils_user_data_pack(self, g_object_ref(priv->context_busy_watcher)));
|
|
|
|
|
}
|
|
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
/*****************************************************************************/
|
2016-10-18 16:35:07 +02:00
|
|
|
|
2020-01-20 11:09:32 +01:00
|
|
|
static inline gboolean
|
|
|
|
|
_nml_cleanup_context_busy_watcher_on_idle_cb(gpointer user_data)
|
|
|
|
|
{
|
2021-11-09 13:28:54 +01:00
|
|
|
nm_auto_unref_gmaincontext GMainContext *context = NULL;
|
|
|
|
|
gs_unref_object GObject *context_busy_watcher = NULL;
|
2020-01-20 11:09:32 +01:00
|
|
|
|
|
|
|
|
nm_utils_user_data_unpack(user_data, &context, &context_busy_watcher);
|
|
|
|
|
|
|
|
|
|
nm_assert(context);
|
|
|
|
|
nm_assert(G_IS_OBJECT(context_busy_watcher));
|
|
|
|
|
return G_SOURCE_REMOVE;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
nml_cleanup_context_busy_watcher_on_idle(GObject *context_busy_watcher_take, GMainContext *context)
|
|
|
|
|
{
|
|
|
|
|
gs_unref_object GObject *context_busy_watcher = g_steal_pointer(&context_busy_watcher_take);
|
2021-11-09 13:28:54 +01:00
|
|
|
GSource *cleanup_source;
|
2020-01-20 11:09:32 +01:00
|
|
|
|
|
|
|
|
nm_assert(G_IS_OBJECT(context_busy_watcher));
|
|
|
|
|
nm_assert(context);
|
|
|
|
|
|
|
|
|
|
/* Technically, we cancelled all pending actions (and these actions
|
|
|
|
|
* (GTask) keep the context_busy_watcher object alive). Also, we passed
|
|
|
|
|
* no destroy notify to g_dbus_connection_signal_subscribe().
|
|
|
|
|
* That means, there should be no other unaccounted GSource'es left.
|
|
|
|
|
*
|
2021-03-29 10:28:39 +02:00
|
|
|
* Another reason is g_dbus_connection_signal_unsubscribe() which does
|
|
|
|
|
* not guarantee that everything was unsubscribed right away. Instead,
|
|
|
|
|
* there might still be idle actions queued (which will be thrown away
|
|
|
|
|
* once processed). Still, that means, the caller must continue to iterate
|
|
|
|
|
* the main context afterwards. Maybe we should pass a GDestroyNotify to
|
|
|
|
|
* g_dbus_connection_signal_subscribe() which handles an additional reference
|
|
|
|
|
* to the context_busy_watcher object. That would be the proper way to handle
|
|
|
|
|
* this. We don't do that, so in practice at this point there might still
|
|
|
|
|
* be some idle actions (with G_PRIORITY_DEFAULT) pending. As we schedule here
|
|
|
|
|
* another idle action with lower priority, we handle those cases without
|
|
|
|
|
* using a GDestroyNotify.
|
|
|
|
|
*
|
2020-01-20 11:09:32 +01:00
|
|
|
* However, we really need to be sure that the context_busy_watcher's
|
|
|
|
|
* lifetime matches the time that the context is busy. That is especially
|
|
|
|
|
* important with synchronous initialization, where the context-busy-watcher
|
|
|
|
|
* keeps the inner GMainContext integrated in the caller's.
|
|
|
|
|
* We must not g_source_destroy() that integration too early.
|
|
|
|
|
*
|
|
|
|
|
* So to be really sure all this is given, always schedule one last
|
|
|
|
|
* cleanup idle action with low priority. This should be the last
|
|
|
|
|
* thing related to this instance that keeps the context busy.
|
|
|
|
|
*
|
|
|
|
|
* Note that we could also *not* take a reference on @context
|
|
|
|
|
* and unref @context_busy_watcher via the GDestroyNotify. That would
|
|
|
|
|
* allow for the context to be wrapped up early, and when the last user
|
|
|
|
|
* gives up the reference to the context, the destroy notify could complete
|
|
|
|
|
* without even invoke the idle handler. However, that destroy notify may
|
|
|
|
|
* not be called in the right thread. So, we want to be sure that we unref
|
|
|
|
|
* the context-busy-watcher in the right context. Hence, we always take an
|
|
|
|
|
* additional reference and always cleanup in the idle handler. This means:
|
|
|
|
|
* the user *MUST* always keep iterating the context after NMClient got destroyed.
|
|
|
|
|
* But that is not a severe limitation, because the user anyway must be prepared
|
|
|
|
|
* to do that. That is because in many cases it is necessary anyway (and the user
|
2021-03-29 10:28:39 +02:00
|
|
|
* wouldn't know a priory when not). This way, it is just always necessary.
|
|
|
|
|
*
|
|
|
|
|
* It might be nice to use G_DEFAULT_PRIORITY here to minimize how long the
|
|
|
|
|
* instance stays alive. Probably that would be fine, even without passing
|
|
|
|
|
* a GDestroyNotify to g_dbus_connection_signal_subscribe(). But to be extra
|
|
|
|
|
* careful, use a low priority.
|
|
|
|
|
**/
|
2020-01-20 11:09:32 +01:00
|
|
|
|
|
|
|
|
cleanup_source =
|
|
|
|
|
nm_g_idle_source_new(G_PRIORITY_LOW + 10,
|
|
|
|
|
_nml_cleanup_context_busy_watcher_on_idle_cb,
|
|
|
|
|
nm_utils_user_data_pack(g_main_context_ref(context),
|
|
|
|
|
g_steal_pointer(&context_busy_watcher)),
|
|
|
|
|
NULL);
|
|
|
|
|
g_source_attach(cleanup_source, context);
|
|
|
|
|
g_source_unref(cleanup_source);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*****************************************************************************/
|
|
|
|
|
|
2016-10-18 16:35:07 +02:00
|
|
|
static void
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
_init_start_complete(NMClient *self, GError *error_take)
|
2016-10-18 16:35:07 +02:00
|
|
|
{
|
2022-10-06 10:13:14 +02:00
|
|
|
gs_unref_object NMClient *self_keep_alive = g_object_ref(self);
|
|
|
|
|
NMClientPrivate *priv = NM_CLIENT_GET_PRIVATE(self);
|
2016-10-18 16:35:07 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
NML_NMCLIENT_LOG_D(
|
|
|
|
|
self,
|
|
|
|
|
"%s init complete with %s%s%s",
|
2019-12-18 17:12:45 +01:00
|
|
|
priv->init_data->is_sync ? "sync" : "async",
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
NM_PRINT_FMT_QUOTED(error_take, "error: ", error_take->message, "", "success"));
|
2016-10-18 16:35:07 +02:00
|
|
|
|
2022-10-06 10:13:14 +02:00
|
|
|
priv->instance_flags |= (error_take ? NM_CLIENT_INSTANCE_FLAGS_INITIALIZED_BAD
|
|
|
|
|
: NM_CLIENT_INSTANCE_FLAGS_INITIALIZED_GOOD);
|
|
|
|
|
|
|
|
|
|
_notify(self, PROP_INSTANCE_FLAGS);
|
|
|
|
|
|
2019-12-18 17:12:45 +01:00
|
|
|
nml_init_data_return(g_steal_pointer(&priv->init_data), error_take);
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
_init_start_check_complete(NMClient *self)
|
|
|
|
|
{
|
|
|
|
|
NMClientPrivate *priv = NM_CLIENT_GET_PRIVATE(self);
|
|
|
|
|
|
|
|
|
|
_assert_main_context_is_current_thread_default(self, main_context);
|
2016-10-18 16:35:07 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
if (!priv->init_data)
|
|
|
|
|
return;
|
2016-10-18 16:35:07 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
if (priv->get_managed_objects_cancellable) {
|
|
|
|
|
/* still initializing. Wait. */
|
|
|
|
|
return;
|
2016-10-18 16:35:07 +02:00
|
|
|
}
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
|
|
|
|
|
#if NM_MORE_ASSERTS > 10
|
|
|
|
|
{
|
|
|
|
|
NMLDBusObject *dbobj;
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
c_list_for_each_entry (dbobj,
|
|
|
|
|
&priv->dbus_objects_lst_head_with_nmobj_not_ready,
|
|
|
|
|
dbus_objects_lst) {
|
|
|
|
|
NML_NMCLIENT_LOG_T(self, "init-start waiting for %s", dbobj->dbus_path->str);
|
|
|
|
|
break;
|
|
|
|
|
}
|
2016-10-25 11:11:12 +02:00
|
|
|
}
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
if (!c_list_is_empty(&priv->dbus_objects_lst_head_with_nmobj_not_ready))
|
|
|
|
|
return;
|
2016-10-18 16:35:07 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
_init_start_complete(self, NULL);
|
2016-10-18 16:35:07 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
_init_start_cancelled_cb(GCancellable *cancellable, gpointer user_data)
|
2016-10-18 16:35:07 +02:00
|
|
|
{
|
libnm: fix crash during the restart after killed
When cloud-init job (metadata service crawler) starts, it sends the
SIGTERM signal to nm-cloud-setup and force the nm-cloud-setup to
restart, however, because the error is not initialized as NULL in
`_init_start_cancelled_cb()` before it is set, nm-cloud-setup will hit
a dumped core.
TO fix it, initialize the error as NULL in `_init_start_cancelled_cb()`.
https://bugzilla.redhat.com/show_bug.cgi?id=2027674
Fixes: ce0e898fb476 ('libnm: refactor caching of D-Bus objects in NMClient')
Backtrace:
#0 g_logv (log_domain=0x7f833a872071 "GLib", log_level=G_LOG_LEVEL_WARNING, format=<optimized out>, args=<optimized out>) at ../glib/gmessages.c:1413
#1 0x00007f833a81f043 in g_log (log_domain=<optimized out>, log_level=<optimized out>, format=<optimized out>) at ../glib/gmessages.c:1451
#2 0x00007f833ab97230 in nm_utils_error_set_cancelled (is_disposing=<optimized out>, instance_name=<optimized out>, error=0x7ffff79cb980) at src/libnm-glib-aux/nm-shared-utils.c:2599
#3 nm_utils_error_set_cancelled (is_disposing=0, instance_name=0x0, error=0x7ffff79cb980) at src/libnm-glib-aux/nm-shared-utils.c:2590
#4 _init_start_cancelled_cb (cancellable=<optimized out>, user_data=0x5640ca292150) at src/libnm-client-impl/nm-client.c:7324
#5 _init_start_cancelled_cb (cancellable=<optimized out>, user_data=0x5640ca292150) at src/libnm-client-impl/nm-client.c:7307
#6 0x00007f833a93094a in _g_closure_invoke_va (param_types=0x0, n_params=<optimized out>, args=0x7ffff79cbb40, instance=0x5640ca267020, return_value=0x0, closure=0x5640ca29d430)
at ../gobject/gclosure.c:873
#7 g_signal_emit_valist (instance=0x5640ca267020, signal_id=<optimized out>, detail=0, var_args=var_args@entry=0x7ffff79cbb40) at ../gobject/gsignal.c:3406
#8 0x00007f833a930a93 in g_signal_emit (instance=instance@entry=0x5640ca267020, signal_id=<optimized out>, detail=detail@entry=0) at ../gobject/gsignal.c:3553
#9 0x00007f833a9a6475 in g_cancellable_cancel (cancellable=0x5640ca267020) at ../gio/gcancellable.c:513
#10 g_cancellable_cancel (cancellable=0x5640ca267020) at ../gio/gcancellable.c:487
#11 0x00005640ca1a8bd4 in sigterm_handler (user_data=0x5640ca267020) at src/nm-cloud-setup/main.c:599
#12 0x00007f833a819d4f in g_main_dispatch (context=0x5640ca268ef0) at ../glib/gmain.c:3337
#13 g_main_context_dispatch (context=0x5640ca268ef0) at ../glib/gmain.c:4055
#14 0x00007f833a86e608 in g_main_context_iterate.constprop.0 (context=0x5640ca268ef0, block=block@entry=1, dispatch=dispatch@entry=1, self=<optimized out>) at ../glib/gmain.c:4131
#15 0x00007f833a819463 in g_main_loop_run (loop=0x5640ca24fdb0) at ../glib/gmain.c:4329
#16 0x00005640ca1a6d04 in nmc_client_new_waitsync (cancellable=0x5640ca267020, out_nmc=0x7ffff79cbfa0, error=0x7ffff79cbf98, first_property_name=0x5640ca1b11db "instance-flags",
first_property_name=0x5640ca1b11db "instance-flags") at src/libnm-client-aux-extern/nm-libnm-aux.c:129
#17 0x00005640ca1a3863 in main (argc=1, argv=<optimized out>) at src/nm-cloud-setup/main.c:639
2022-03-08 15:58:38 -05:00
|
|
|
NMClient *self = user_data;
|
|
|
|
|
NMClientPrivate *priv = NM_CLIENT_GET_PRIVATE(self);
|
|
|
|
|
GError *error = NULL;
|
2016-10-18 16:35:07 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
nm_assert(NM_IS_CLIENT(self));
|
2021-03-29 09:06:50 +02:00
|
|
|
nm_assert(priv->init_data);
|
|
|
|
|
nm_assert(priv->init_data->cancellable == cancellable);
|
|
|
|
|
|
|
|
|
|
if (priv->init_data->cancelled_id == 0) {
|
|
|
|
|
/* this only can happen if the cancellable was already cancelled initially.
|
|
|
|
|
* In this case we are called synchronously. Do nothing, and let the caller
|
|
|
|
|
* handle it. */
|
|
|
|
|
return;
|
|
|
|
|
}
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
|
|
|
|
|
nm_utils_error_set_cancelled(&error, FALSE, NULL);
|
|
|
|
|
_init_start_complete(self, error);
|
2016-10-18 16:35:07 +02:00
|
|
|
}
|
|
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
static gboolean
|
|
|
|
|
_init_start_cancel_on_idle_cb(gpointer user_data)
|
2014-09-29 10:58:16 -04:00
|
|
|
{
|
2022-03-09 23:00:21 +01:00
|
|
|
NMClient *self = user_data;
|
|
|
|
|
GError *error = NULL;
|
2016-10-18 16:35:07 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
nm_utils_error_set_cancelled(&error, FALSE, NULL);
|
|
|
|
|
_init_start_complete(self, error);
|
|
|
|
|
return G_SOURCE_CONTINUE;
|
|
|
|
|
}
|
2014-09-29 10:58:16 -04:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
static void
|
|
|
|
|
_init_start_with_bus(NMClient *self)
|
|
|
|
|
{
|
|
|
|
|
NMClientPrivate *priv = NM_CLIENT_GET_PRIVATE(self);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
if (priv->init_data->cancellable) {
|
2021-03-29 09:06:50 +02:00
|
|
|
gulong id;
|
|
|
|
|
|
|
|
|
|
id = g_cancellable_connect(priv->init_data->cancellable,
|
|
|
|
|
G_CALLBACK(_init_start_cancelled_cb),
|
|
|
|
|
self,
|
|
|
|
|
NULL);
|
|
|
|
|
if (id == 0) {
|
|
|
|
|
priv->init_data->cancel_on_idle_source =
|
2022-03-08 14:29:51 +01:00
|
|
|
nm_g_idle_source_new(G_PRIORITY_DEFAULT_IDLE,
|
|
|
|
|
_init_start_cancel_on_idle_cb,
|
|
|
|
|
self,
|
|
|
|
|
NULL);
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
g_source_attach(priv->init_data->cancel_on_idle_source, priv->main_context);
|
2017-06-28 18:42:04 +02:00
|
|
|
return;
|
2017-06-21 15:32:23 +02:00
|
|
|
}
|
2021-03-29 09:06:50 +02:00
|
|
|
|
|
|
|
|
priv->init_data->cancelled_id = id;
|
2020-09-28 16:03:33 +02:00
|
|
|
}
|
|
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
_assert_main_context_is_current_thread_default(self, dbus_context);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
priv->name_owner_changed_id =
|
|
|
|
|
nm_dbus_connection_signal_subscribe_name_owner_changed(priv->dbus_connection,
|
|
|
|
|
NM_DBUS_SERVICE,
|
|
|
|
|
name_owner_changed_cb,
|
|
|
|
|
self,
|
|
|
|
|
NULL);
|
libnm: fix leaking internal GMainContext for synchronously initialized NMClient
NMClient makes asynchronous D-Bus calls via g_dbus_connection_call().
This references the current GMainContext to later invoke the
asynchronous callback. Even when we cancel the asynchronous call,
the callback will still be invoked (later) to complete the request.
In particular this means when we destroy (unref) an NMClient, there
are quite possibly pending requests in the GMainContext. Although they
are cancelled, they keep the GMainContext alive.
With synchronous initialization, we have an internal GMainContext.
When we destroy the NMClient, we cannot just unhook the integrated
source, instead, we need to keep it integrated in the caller's main
context, as long as there are pending requests.
Add a mechanism to track those pending requests and fix the leak for the
internal GMainContext. Also expose the same mechanism to the user via a new
API called nm_client_get_context_busy_watcher(). This allows the user
to know when it can stop iterating the main context and when all
resources are reclaimed.
For example the following will lead to a crash:
for i in range(1,2000):
nmc = NM.Client.new(None)
This creates a number of NMClient instances and destroys them again.
Note that here the GMainContext is never iterated, because
synchronous initialization does not iterate the caller's context. So,
while we correctly unref and dispose the created NMClient instances,
there are pending requests left in the inner GMainContext. These pile
up and soon the program will crash because it runs out of file descriptors.
We can have a similar problem with asynchronous initialization, when
we create a new GMainContext per client, and don't iterate it after
we are done with the client.
Note that this patch does not avoid the problem in general. The problem
cannot be avoided, the user must iterate the main contex at some point.
Otherwise resources (memory and file descriptors) will be leaked.
Fixes: ce0e898fb476 ('libnm: refactor caching of D-Bus objects in NMClient')
https://gitlab.freedesktop.org/NetworkManager/NetworkManager/merge_requests/347
2019-11-26 00:23:41 +01:00
|
|
|
name_owner_get_call(self);
|
2014-07-24 08:53:33 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
_init_start_bus_get_cb(GObject *source, GAsyncResult *result, gpointer user_data)
|
2014-07-24 08:53:33 -04:00
|
|
|
{
|
2021-11-09 13:28:54 +01:00
|
|
|
NMClient *self = user_data;
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
NMClientPrivate *priv;
|
|
|
|
|
GDBusConnection *dbus_connection;
|
2021-11-09 13:28:54 +01:00
|
|
|
GError *error = NULL;
|
2014-07-24 08:53:33 -04:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
nm_assert(NM_IS_CLIENT(self));
|
2014-07-24 08:53:33 -04:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
dbus_connection = g_bus_get_finish(result, &error);
|
2019-10-12 14:59:23 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
if (!dbus_connection) {
|
|
|
|
|
_init_start_complete(self, error);
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
priv = NM_CLIENT_GET_PRIVATE(self);
|
|
|
|
|
priv->dbus_connection = dbus_connection;
|
|
|
|
|
|
|
|
|
|
_init_start_with_bus(self);
|
2019-12-18 17:13:06 +01:00
|
|
|
|
|
|
|
|
_notify(self, PROP_DBUS_CONNECTION);
|
2016-10-18 16:35:07 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
_init_start(NMClient *self)
|
2016-10-18 16:35:07 +02:00
|
|
|
{
|
|
|
|
|
NMClientPrivate *priv = NM_CLIENT_GET_PRIVATE(self);
|
|
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
NML_NMCLIENT_LOG_D(self,
|
|
|
|
|
"starting %s initialization...",
|
|
|
|
|
priv->init_data->is_sync ? "sync" : "async");
|
2017-06-20 14:13:16 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
if (!priv->dbus_connection) {
|
|
|
|
|
g_bus_get(_nm_dbus_bus_type(), priv->init_data->cancellable, _init_start_bus_get_cb, self);
|
|
|
|
|
return;
|
2016-10-18 16:35:07 +02:00
|
|
|
}
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
|
|
|
|
|
_init_start_with_bus(self);
|
2016-10-18 16:35:07 +02:00
|
|
|
}
|
|
|
|
|
|
2019-10-11 11:58:09 +02:00
|
|
|
/*****************************************************************************/
|
2014-07-24 08:53:33 -04:00
|
|
|
|
|
|
|
|
static void
|
2014-09-24 13:14:48 -04:00
|
|
|
get_property(GObject *object, guint prop_id, GValue *value, GParamSpec *pspec)
|
2014-07-24 08:53:33 -04:00
|
|
|
{
|
2021-11-09 13:28:54 +01:00
|
|
|
NMClient *self = NM_CLIENT(object);
|
2016-10-18 16:35:07 +02:00
|
|
|
NMClientPrivate *priv = NM_CLIENT_GET_PRIVATE(object);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2014-07-24 08:53:33 -04:00
|
|
|
switch (prop_id) {
|
2019-12-05 18:17:43 +01:00
|
|
|
case PROP_INSTANCE_FLAGS:
|
|
|
|
|
g_value_set_uint(value, priv->instance_flags);
|
|
|
|
|
break;
|
2019-10-12 14:59:23 +02:00
|
|
|
case PROP_DBUS_CONNECTION:
|
|
|
|
|
g_value_set_object(value, priv->dbus_connection);
|
|
|
|
|
break;
|
2019-10-15 16:08:36 +02:00
|
|
|
case PROP_DBUS_NAME_OWNER:
|
|
|
|
|
g_value_set_string(value, nm_client_get_dbus_name_owner(self));
|
|
|
|
|
break;
|
2016-10-18 16:35:07 +02:00
|
|
|
case PROP_NM_RUNNING:
|
|
|
|
|
g_value_set_boolean(value, nm_client_get_nm_running(self));
|
|
|
|
|
break;
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2016-10-18 16:35:07 +02:00
|
|
|
/* Manager properties. */
|
2014-07-24 08:53:33 -04:00
|
|
|
case PROP_VERSION:
|
2016-10-18 16:35:07 +02:00
|
|
|
g_value_set_string(value, nm_client_get_version(self));
|
|
|
|
|
break;
|
2014-07-24 08:53:33 -04:00
|
|
|
case PROP_STATE:
|
2016-10-18 16:35:07 +02:00
|
|
|
g_value_set_enum(value, nm_client_get_state(self));
|
|
|
|
|
break;
|
2014-07-24 08:53:33 -04:00
|
|
|
case PROP_STARTUP:
|
2016-10-18 16:35:07 +02:00
|
|
|
g_value_set_boolean(value, nm_client_get_startup(self));
|
|
|
|
|
break;
|
2014-07-24 08:53:33 -04:00
|
|
|
case PROP_NETWORKING_ENABLED:
|
2016-10-18 16:35:07 +02:00
|
|
|
g_value_set_boolean(value, nm_client_networking_get_enabled(self));
|
|
|
|
|
break;
|
2014-07-24 08:53:33 -04:00
|
|
|
case PROP_WIRELESS_ENABLED:
|
2016-10-18 16:35:07 +02:00
|
|
|
g_value_set_boolean(value, nm_client_wireless_get_enabled(self));
|
|
|
|
|
break;
|
2014-07-24 08:53:33 -04:00
|
|
|
case PROP_WIRELESS_HARDWARE_ENABLED:
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
g_value_set_boolean(value, nm_client_wireless_hardware_get_enabled(self));
|
2016-10-18 16:35:07 +02:00
|
|
|
break;
|
2022-03-21 10:20:11 +01:00
|
|
|
case PROP_RADIO_FLAGS:
|
|
|
|
|
g_value_set_uint(value, priv->nm.radio_flags);
|
|
|
|
|
break;
|
2014-07-24 08:53:33 -04:00
|
|
|
case PROP_WWAN_ENABLED:
|
2016-10-18 16:35:07 +02:00
|
|
|
g_value_set_boolean(value, nm_client_wwan_get_enabled(self));
|
|
|
|
|
break;
|
2014-07-24 08:53:33 -04:00
|
|
|
case PROP_WWAN_HARDWARE_ENABLED:
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
g_value_set_boolean(value, nm_client_wwan_hardware_get_enabled(self));
|
2016-10-18 16:35:07 +02:00
|
|
|
break;
|
2014-07-24 08:53:33 -04:00
|
|
|
case PROP_WIMAX_ENABLED:
|
2019-10-29 20:05:02 +01:00
|
|
|
g_value_set_boolean(value, FALSE);
|
2016-10-18 16:35:07 +02:00
|
|
|
break;
|
2014-07-24 08:53:33 -04:00
|
|
|
case PROP_WIMAX_HARDWARE_ENABLED:
|
2019-10-29 20:05:02 +01:00
|
|
|
g_value_set_boolean(value, FALSE);
|
2016-10-18 16:35:07 +02:00
|
|
|
break;
|
2014-07-24 08:53:33 -04:00
|
|
|
case PROP_ACTIVE_CONNECTIONS:
|
2016-10-18 16:35:07 +02:00
|
|
|
g_value_take_boxed(value,
|
|
|
|
|
_nm_utils_copy_object_array(nm_client_get_active_connections(self)));
|
|
|
|
|
break;
|
2014-07-24 08:53:33 -04:00
|
|
|
case PROP_CONNECTIVITY:
|
2016-10-18 16:35:07 +02:00
|
|
|
g_value_set_enum(value, nm_client_get_connectivity(self));
|
|
|
|
|
break;
|
2017-08-09 15:21:28 +08:00
|
|
|
case PROP_CONNECTIVITY_CHECK_AVAILABLE:
|
|
|
|
|
g_value_set_boolean(value, nm_client_connectivity_check_get_available(self));
|
|
|
|
|
break;
|
|
|
|
|
case PROP_CONNECTIVITY_CHECK_ENABLED:
|
|
|
|
|
g_value_set_boolean(value, nm_client_connectivity_check_get_enabled(self));
|
|
|
|
|
break;
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
case PROP_CONNECTIVITY_CHECK_URI:
|
|
|
|
|
g_value_set_string(value, nm_client_connectivity_check_get_uri(self));
|
|
|
|
|
break;
|
2014-07-24 08:53:33 -04:00
|
|
|
case PROP_PRIMARY_CONNECTION:
|
2016-10-18 16:35:07 +02:00
|
|
|
g_value_set_object(value, nm_client_get_primary_connection(self));
|
|
|
|
|
break;
|
2014-07-24 08:53:33 -04:00
|
|
|
case PROP_ACTIVATING_CONNECTION:
|
2016-10-18 16:35:07 +02:00
|
|
|
g_value_set_object(value, nm_client_get_activating_connection(self));
|
|
|
|
|
break;
|
2014-07-24 08:53:33 -04:00
|
|
|
case PROP_DEVICES:
|
2016-10-18 16:35:07 +02:00
|
|
|
g_value_take_boxed(value, _nm_utils_copy_object_array(nm_client_get_devices(self)));
|
|
|
|
|
break;
|
2015-06-03 09:15:24 +02:00
|
|
|
case PROP_METERED:
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
g_value_set_uint(value, nm_client_get_metered(self));
|
2016-10-18 16:35:07 +02:00
|
|
|
break;
|
2014-10-10 14:28:49 -05:00
|
|
|
case PROP_ALL_DEVICES:
|
2016-10-18 16:35:07 +02:00
|
|
|
g_value_take_boxed(value, _nm_utils_copy_object_array(nm_client_get_all_devices(self)));
|
2014-07-24 08:53:33 -04:00
|
|
|
break;
|
2017-10-21 16:05:19 +02:00
|
|
|
case PROP_CHECKPOINTS:
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
g_value_take_boxed(value, _nm_utils_copy_object_array(nm_client_get_checkpoints(self)));
|
2017-10-21 16:05:19 +02:00
|
|
|
break;
|
core: add "VersionInfo" property on D-Bus and NMClient
This exposes NM_VERSION as number (contrary to the "Version", which is a
string). That is in particular useful, because the number can be
compared with <> due to the encoding of the version.
While at it, don't make it a single number. Expose an array of numbers,
where the following numbers are a bitfield of capabilities.
Note that before commit 3c67a1ec5e8e ('cli: remove version check against
NM'), we used to parse the "Version" string to detect the version. As
such, the information that "VersionInfo" exposes now, was already
(somewhat) available, you just had to parse the string. The main benefit of
"VersionInfo" is that it can expose capabilities (patched behavior) in
in a lightweight bitfield. To include the numerical version there is
just useful on top.
Currently no additional capabilities are exposed. The idea is of course
to have a place in the future, where we can expose additional
capabilities. Adding a capability flag is most useful for behavior that we
backport to older branches. Otherwise, we could just check the daemon version
alone. But since we only add "VersionInfo" property only now, we cannot backport
any capability further than this, because the "VersionInfo" property itself
won't be backported. As such, this will only be useful in the future by having
a place where we can add (and backport) capabilities.
Note that there is some overlap with the existing "Capability" property
and NMCapability enum. The difference is that adding a capability via "VersionInfo"
is only one bit, and thus cheaper. Most importantly, having it cheaper means
the downsides of adding a capability flag is significantly removed. In
practice, we could live without capabilities for a long time, so they
must be very cheap for them to be worth to add. Another difference might be,
that we will want that the VersionInfo is about compile time defaults (e.g.
a certain patch/behavior that is in or not), while NM_CAPABILITY_TEAM depends on
whether the team plugin is loaded at runtime.
2022-12-13 12:25:04 +01:00
|
|
|
case PROP_VERSION_INFO:
|
2019-12-20 18:10:55 +01:00
|
|
|
case PROP_CAPABILITIES:
|
|
|
|
|
{
|
|
|
|
|
const guint32 *arr;
|
2021-11-09 13:28:54 +01:00
|
|
|
GArray *out;
|
2019-12-20 18:10:55 +01:00
|
|
|
gsize len;
|
2020-09-28 16:03:33 +02:00
|
|
|
|
core: add "VersionInfo" property on D-Bus and NMClient
This exposes NM_VERSION as number (contrary to the "Version", which is a
string). That is in particular useful, because the number can be
compared with <> due to the encoding of the version.
While at it, don't make it a single number. Expose an array of numbers,
where the following numbers are a bitfield of capabilities.
Note that before commit 3c67a1ec5e8e ('cli: remove version check against
NM'), we used to parse the "Version" string to detect the version. As
such, the information that "VersionInfo" exposes now, was already
(somewhat) available, you just had to parse the string. The main benefit of
"VersionInfo" is that it can expose capabilities (patched behavior) in
in a lightweight bitfield. To include the numerical version there is
just useful on top.
Currently no additional capabilities are exposed. The idea is of course
to have a place in the future, where we can expose additional
capabilities. Adding a capability flag is most useful for behavior that we
backport to older branches. Otherwise, we could just check the daemon version
alone. But since we only add "VersionInfo" property only now, we cannot backport
any capability further than this, because the "VersionInfo" property itself
won't be backported. As such, this will only be useful in the future by having
a place where we can add (and backport) capabilities.
Note that there is some overlap with the existing "Capability" property
and NMCapability enum. The difference is that adding a capability via "VersionInfo"
is only one bit, and thus cheaper. Most importantly, having it cheaper means
the downsides of adding a capability flag is significantly removed. In
practice, we could live without capabilities for a long time, so they
must be very cheap for them to be worth to add. Another difference might be,
that we will want that the VersionInfo is about compile time defaults (e.g.
a certain patch/behavior that is in or not), while NM_CAPABILITY_TEAM depends on
whether the team plugin is loaded at runtime.
2022-12-13 12:25:04 +01:00
|
|
|
arr = (prop_id == PROP_VERSION_INFO) ? nm_client_get_version_info(self, &len)
|
|
|
|
|
: nm_client_get_capabilities(self, &len);
|
2019-12-20 18:10:55 +01:00
|
|
|
if (arr) {
|
|
|
|
|
out = g_array_new(TRUE, FALSE, sizeof(guint32));
|
|
|
|
|
g_array_append_vals(out, arr, len);
|
|
|
|
|
} else
|
|
|
|
|
out = NULL;
|
|
|
|
|
g_value_take_boxed(value, out);
|
|
|
|
|
} break;
|
2019-12-05 15:53:51 +01:00
|
|
|
case PROP_PERMISSIONS_STATE:
|
|
|
|
|
g_value_set_enum(value, priv->permissions_state);
|
|
|
|
|
break;
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2016-10-18 16:35:07 +02:00
|
|
|
/* Settings properties. */
|
2014-09-29 10:58:16 -04:00
|
|
|
case PROP_CONNECTIONS:
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
g_value_take_boxed(value, _nm_utils_copy_object_array(nm_client_get_connections(self)));
|
2016-10-18 16:35:07 +02:00
|
|
|
break;
|
2014-09-29 10:58:16 -04:00
|
|
|
case PROP_HOSTNAME:
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
g_value_set_string(value, priv->settings.hostname);
|
2016-10-18 16:35:07 +02:00
|
|
|
break;
|
2014-09-29 10:58:16 -04:00
|
|
|
case PROP_CAN_MODIFY:
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
g_value_set_boolean(value, priv->settings.can_modify);
|
2014-09-29 10:58:16 -04:00
|
|
|
break;
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2016-10-25 11:11:12 +02:00
|
|
|
/* DNS properties */
|
|
|
|
|
case PROP_DNS_MODE:
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
g_value_set_string(value, nm_client_get_dns_mode(self));
|
|
|
|
|
break;
|
2016-10-25 11:11:12 +02:00
|
|
|
case PROP_DNS_RC_MANAGER:
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
g_value_set_string(value, nm_client_get_dns_rc_manager(self));
|
2016-10-25 11:11:12 +02:00
|
|
|
break;
|
|
|
|
|
case PROP_DNS_CONFIGURATION:
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
g_value_take_boxed(value,
|
|
|
|
|
_nm_utils_copy_array(nm_client_get_dns_configuration(self),
|
|
|
|
|
(NMUtilsCopyFunc) nm_dns_entry_dup,
|
|
|
|
|
(GDestroyNotify) nm_dns_entry_unref));
|
2016-10-25 11:11:12 +02:00
|
|
|
break;
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2014-07-24 08:53:33 -04:00
|
|
|
default:
|
|
|
|
|
G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2019-10-11 11:58:09 +02:00
|
|
|
static void
|
|
|
|
|
set_property(GObject *object, guint prop_id, const GValue *value, GParamSpec *pspec)
|
|
|
|
|
{
|
2021-11-09 13:28:54 +01:00
|
|
|
NMClient *self = NM_CLIENT(object);
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
NMClientPrivate *priv = NM_CLIENT_GET_PRIVATE(self);
|
|
|
|
|
gboolean b;
|
2019-12-05 18:17:43 +01:00
|
|
|
guint v_uint;
|
|
|
|
|
|
2019-10-11 11:58:09 +02:00
|
|
|
switch (prop_id) {
|
2019-12-05 18:17:43 +01:00
|
|
|
case PROP_INSTANCE_FLAGS:
|
|
|
|
|
/* construct */
|
|
|
|
|
|
|
|
|
|
v_uint = g_value_get_uint(value);
|
2022-10-06 10:13:14 +02:00
|
|
|
|
|
|
|
|
/* Silently ignore "initialized-{good,bad}" flags. They are only set internally
|
|
|
|
|
* and cannot be change by the user. However, accept the caller to set them,
|
|
|
|
|
* so that
|
|
|
|
|
* nmc.props.instance_flags = nmc.props.instance_flags | NM.ClientInstanceFlags.NO_AUTO_FETCH_PERMISSIONS
|
|
|
|
|
* works. */
|
|
|
|
|
v_uint &= ~((guint) (NM_CLIENT_INSTANCE_FLAGS_INITIALIZED_GOOD
|
|
|
|
|
| NM_CLIENT_INSTANCE_FLAGS_INITIALIZED_BAD));
|
|
|
|
|
|
|
|
|
|
g_return_if_fail(!NM_FLAGS_ANY(v_uint, ~((guint) NM_CLIENT_INSTANCE_FLAGS_ALL_WRITABLE)));
|
|
|
|
|
|
|
|
|
|
v_uint &= ((guint) NM_CLIENT_INSTANCE_FLAGS_ALL_WRITABLE);
|
2019-12-05 18:17:43 +01:00
|
|
|
|
|
|
|
|
if (!priv->instance_flags_constructed) {
|
|
|
|
|
priv->instance_flags_constructed = TRUE;
|
|
|
|
|
priv->instance_flags = v_uint;
|
|
|
|
|
nm_assert((guint) priv->instance_flags == v_uint);
|
|
|
|
|
} else {
|
2019-12-05 15:53:51 +01:00
|
|
|
NMClientInstanceFlags flags = v_uint;
|
|
|
|
|
|
2025-02-28 18:37:15 +01:00
|
|
|
/* After object construction, we only allow one to toggle certain flags and
|
2019-12-05 15:53:51 +01:00
|
|
|
* ignore all other flags. */
|
|
|
|
|
|
|
|
|
|
if ((priv->instance_flags ^ flags)
|
|
|
|
|
& NM_CLIENT_INSTANCE_FLAGS_NO_AUTO_FETCH_PERMISSIONS) {
|
|
|
|
|
if (NM_FLAGS_HAS(flags, NM_CLIENT_INSTANCE_FLAGS_NO_AUTO_FETCH_PERMISSIONS))
|
|
|
|
|
priv->instance_flags |= NM_CLIENT_INSTANCE_FLAGS_NO_AUTO_FETCH_PERMISSIONS;
|
|
|
|
|
else
|
|
|
|
|
priv->instance_flags &= ~NM_CLIENT_INSTANCE_FLAGS_NO_AUTO_FETCH_PERMISSIONS;
|
|
|
|
|
if (priv->dbsid_nm_check_permissions != 0)
|
|
|
|
|
_dbus_check_permissions_start(self);
|
2019-12-05 18:17:43 +01:00
|
|
|
}
|
2020-09-28 16:03:33 +02:00
|
|
|
}
|
2019-12-05 18:17:43 +01:00
|
|
|
break;
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2019-10-12 14:59:23 +02:00
|
|
|
case PROP_DBUS_CONNECTION:
|
|
|
|
|
/* construct-only */
|
|
|
|
|
priv->dbus_connection = g_value_dup_object(value);
|
|
|
|
|
break;
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2019-10-11 11:58:09 +02:00
|
|
|
case PROP_NETWORKING_ENABLED:
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
b = g_value_get_boolean(value);
|
|
|
|
|
if (priv->nm.networking_enabled != b) {
|
|
|
|
|
nm_client_networking_set_enabled(self, b, NULL);
|
|
|
|
|
/* Let the property value flip when we get the change signal from NM */
|
|
|
|
|
}
|
|
|
|
|
break;
|
2019-10-11 11:58:09 +02:00
|
|
|
case PROP_WIRELESS_ENABLED:
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
b = g_value_get_boolean(value);
|
|
|
|
|
if (priv->nm.wireless_enabled != b) {
|
|
|
|
|
nm_client_wireless_set_enabled(self, b);
|
|
|
|
|
/* Let the property value flip when we get the change signal from NM */
|
|
|
|
|
}
|
|
|
|
|
break;
|
2019-10-11 11:58:09 +02:00
|
|
|
case PROP_WWAN_ENABLED:
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
b = g_value_get_boolean(value);
|
|
|
|
|
if (priv->nm.wwan_enabled != b) {
|
|
|
|
|
nm_client_wwan_set_enabled(self, b);
|
|
|
|
|
/* Let the property value flip when we get the change signal from NM */
|
|
|
|
|
}
|
|
|
|
|
break;
|
2019-10-11 11:58:09 +02:00
|
|
|
case PROP_CONNECTIVITY_CHECK_ENABLED:
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
b = g_value_get_boolean(value);
|
|
|
|
|
if (priv->nm.connectivity_check_enabled != b) {
|
|
|
|
|
nm_client_connectivity_check_set_enabled(self, b);
|
|
|
|
|
/* Let the property value flip when we get the change signal from NM */
|
|
|
|
|
}
|
2019-10-11 11:58:09 +02:00
|
|
|
break;
|
2019-10-29 20:05:02 +01:00
|
|
|
case PROP_WIMAX_ENABLED:
|
|
|
|
|
break;
|
2019-10-11 11:58:09 +02:00
|
|
|
default:
|
|
|
|
|
G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*****************************************************************************/
|
|
|
|
|
|
|
|
|
|
static gboolean
|
|
|
|
|
init_sync(GInitable *initable, GCancellable *cancellable, GError **error)
|
|
|
|
|
{
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
gs_unref_object NMClient *self = NULL;
|
2021-11-09 13:28:54 +01:00
|
|
|
NMClientPrivate *priv;
|
|
|
|
|
GMainContext *dbus_context;
|
|
|
|
|
GError *local_error = NULL;
|
|
|
|
|
GMainLoop *main_loop;
|
|
|
|
|
GObject *parent_context_busy_watcher;
|
2019-10-11 11:58:09 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
g_return_val_if_fail(NM_IS_CLIENT(initable), FALSE);
|
2019-10-11 11:58:09 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
self = g_object_ref(NM_CLIENT(initable)); /* keep instance alive. */
|
2019-10-11 11:58:09 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
priv = NM_CLIENT_GET_PRIVATE(self);
|
2019-10-11 11:58:09 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
g_return_val_if_fail(!priv->dbus_context, FALSE);
|
2019-10-11 11:58:09 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
/* when using init_sync(), we use a separate internal GMainContext for
|
|
|
|
|
* all D-Bus operations and use our regular async-init code. That means,
|
|
|
|
|
* also in sync-init, we don't actually block waiting for our D-Bus requests,
|
|
|
|
|
* instead, we only block (g_main_loop_run()) for the overall result.
|
|
|
|
|
*
|
|
|
|
|
* Doing this has a performance overhead. Also, we cannot ever fall back
|
|
|
|
|
* to the regular main-context (not unless we lose the main-owner and
|
|
|
|
|
* need to re-initialize). The reason is that we receive events on our
|
|
|
|
|
* dbus_context, and this cannot be brought in sync -- short of full
|
|
|
|
|
* reinitalizing. Therefor, using sync init not only is slower during
|
|
|
|
|
* construction of the object, but NMClient will stick to the dual GMainContext
|
|
|
|
|
* mode.
|
|
|
|
|
*
|
|
|
|
|
* Aside from this downside, the solution is good:
|
|
|
|
|
*
|
|
|
|
|
* - we don't duplicate the implementation of async-init.
|
|
|
|
|
* - we don't iterate the main-context of the caller while waiting for
|
|
|
|
|
* initialization to happen
|
|
|
|
|
* - we still invoke all changes under the main_context of the caller.
|
|
|
|
|
* - all D-Bus events strictly go through dbus_context and are in order.
|
|
|
|
|
*/
|
2019-10-11 11:58:09 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
dbus_context = g_main_context_new();
|
|
|
|
|
priv->dbus_context = g_main_context_ref(dbus_context);
|
|
|
|
|
|
libnm: fix leaking internal GMainContext for synchronously initialized NMClient
NMClient makes asynchronous D-Bus calls via g_dbus_connection_call().
This references the current GMainContext to later invoke the
asynchronous callback. Even when we cancel the asynchronous call,
the callback will still be invoked (later) to complete the request.
In particular this means when we destroy (unref) an NMClient, there
are quite possibly pending requests in the GMainContext. Although they
are cancelled, they keep the GMainContext alive.
With synchronous initialization, we have an internal GMainContext.
When we destroy the NMClient, we cannot just unhook the integrated
source, instead, we need to keep it integrated in the caller's main
context, as long as there are pending requests.
Add a mechanism to track those pending requests and fix the leak for the
internal GMainContext. Also expose the same mechanism to the user via a new
API called nm_client_get_context_busy_watcher(). This allows the user
to know when it can stop iterating the main context and when all
resources are reclaimed.
For example the following will lead to a crash:
for i in range(1,2000):
nmc = NM.Client.new(None)
This creates a number of NMClient instances and destroys them again.
Note that here the GMainContext is never iterated, because
synchronous initialization does not iterate the caller's context. So,
while we correctly unref and dispose the created NMClient instances,
there are pending requests left in the inner GMainContext. These pile
up and soon the program will crash because it runs out of file descriptors.
We can have a similar problem with asynchronous initialization, when
we create a new GMainContext per client, and don't iterate it after
we are done with the client.
Note that this patch does not avoid the problem in general. The problem
cannot be avoided, the user must iterate the main contex at some point.
Otherwise resources (memory and file descriptors) will be leaked.
Fixes: ce0e898fb476 ('libnm: refactor caching of D-Bus objects in NMClient')
https://gitlab.freedesktop.org/NetworkManager/NetworkManager/merge_requests/347
2019-11-26 00:23:41 +01:00
|
|
|
/* We have an inner context. Note that if we loose the name owner, we have a chance
|
|
|
|
|
* to resync and drop the inner context. That means, requests made against the inner
|
|
|
|
|
* context have a different lifetime. Hence, we create a separate tracking
|
|
|
|
|
* object. This "wraps" the outer context-busy-watcher and references it, so
|
2019-12-17 13:59:15 +01:00
|
|
|
* that the work together. Grep for nm_context_busy_watcher_quark() to
|
libnm: fix leaking internal GMainContext for synchronously initialized NMClient
NMClient makes asynchronous D-Bus calls via g_dbus_connection_call().
This references the current GMainContext to later invoke the
asynchronous callback. Even when we cancel the asynchronous call,
the callback will still be invoked (later) to complete the request.
In particular this means when we destroy (unref) an NMClient, there
are quite possibly pending requests in the GMainContext. Although they
are cancelled, they keep the GMainContext alive.
With synchronous initialization, we have an internal GMainContext.
When we destroy the NMClient, we cannot just unhook the integrated
source, instead, we need to keep it integrated in the caller's main
context, as long as there are pending requests.
Add a mechanism to track those pending requests and fix the leak for the
internal GMainContext. Also expose the same mechanism to the user via a new
API called nm_client_get_context_busy_watcher(). This allows the user
to know when it can stop iterating the main context and when all
resources are reclaimed.
For example the following will lead to a crash:
for i in range(1,2000):
nmc = NM.Client.new(None)
This creates a number of NMClient instances and destroys them again.
Note that here the GMainContext is never iterated, because
synchronous initialization does not iterate the caller's context. So,
while we correctly unref and dispose the created NMClient instances,
there are pending requests left in the inner GMainContext. These pile
up and soon the program will crash because it runs out of file descriptors.
We can have a similar problem with asynchronous initialization, when
we create a new GMainContext per client, and don't iterate it after
we are done with the client.
Note that this patch does not avoid the problem in general. The problem
cannot be avoided, the user must iterate the main contex at some point.
Otherwise resources (memory and file descriptors) will be leaked.
Fixes: ce0e898fb476 ('libnm: refactor caching of D-Bus objects in NMClient')
https://gitlab.freedesktop.org/NetworkManager/NetworkManager/merge_requests/347
2019-11-26 00:23:41 +01:00
|
|
|
* see how this works. */
|
|
|
|
|
parent_context_busy_watcher = g_steal_pointer(&priv->context_busy_watcher);
|
|
|
|
|
priv->context_busy_watcher = g_object_new(G_TYPE_OBJECT, NULL);
|
|
|
|
|
g_object_set_qdata_full(priv->context_busy_watcher,
|
2019-12-17 13:59:15 +01:00
|
|
|
nm_context_busy_watcher_quark(),
|
libnm: fix leaking internal GMainContext for synchronously initialized NMClient
NMClient makes asynchronous D-Bus calls via g_dbus_connection_call().
This references the current GMainContext to later invoke the
asynchronous callback. Even when we cancel the asynchronous call,
the callback will still be invoked (later) to complete the request.
In particular this means when we destroy (unref) an NMClient, there
are quite possibly pending requests in the GMainContext. Although they
are cancelled, they keep the GMainContext alive.
With synchronous initialization, we have an internal GMainContext.
When we destroy the NMClient, we cannot just unhook the integrated
source, instead, we need to keep it integrated in the caller's main
context, as long as there are pending requests.
Add a mechanism to track those pending requests and fix the leak for the
internal GMainContext. Also expose the same mechanism to the user via a new
API called nm_client_get_context_busy_watcher(). This allows the user
to know when it can stop iterating the main context and when all
resources are reclaimed.
For example the following will lead to a crash:
for i in range(1,2000):
nmc = NM.Client.new(None)
This creates a number of NMClient instances and destroys them again.
Note that here the GMainContext is never iterated, because
synchronous initialization does not iterate the caller's context. So,
while we correctly unref and dispose the created NMClient instances,
there are pending requests left in the inner GMainContext. These pile
up and soon the program will crash because it runs out of file descriptors.
We can have a similar problem with asynchronous initialization, when
we create a new GMainContext per client, and don't iterate it after
we are done with the client.
Note that this patch does not avoid the problem in general. The problem
cannot be avoided, the user must iterate the main contex at some point.
Otherwise resources (memory and file descriptors) will be leaked.
Fixes: ce0e898fb476 ('libnm: refactor caching of D-Bus objects in NMClient')
https://gitlab.freedesktop.org/NetworkManager/NetworkManager/merge_requests/347
2019-11-26 00:23:41 +01:00
|
|
|
parent_context_busy_watcher,
|
|
|
|
|
g_object_unref);
|
|
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
g_main_context_push_thread_default(dbus_context);
|
|
|
|
|
|
|
|
|
|
main_loop = g_main_loop_new(dbus_context, FALSE);
|
|
|
|
|
|
2019-12-17 14:38:29 +01:00
|
|
|
priv->init_data = nml_init_data_new_sync(cancellable, main_loop, &local_error);
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
|
|
|
|
|
_init_start(self);
|
|
|
|
|
|
|
|
|
|
g_main_loop_run(main_loop);
|
|
|
|
|
|
|
|
|
|
g_main_loop_unref(main_loop);
|
|
|
|
|
|
|
|
|
|
g_main_context_pop_thread_default(dbus_context);
|
|
|
|
|
|
|
|
|
|
if (priv->main_context != priv->dbus_context) {
|
2019-12-17 13:59:15 +01:00
|
|
|
nm_context_busy_watcher_integrate_source(priv->main_context,
|
|
|
|
|
priv->dbus_context,
|
|
|
|
|
priv->context_busy_watcher);
|
2019-10-11 11:58:09 +02:00
|
|
|
}
|
|
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
g_main_context_unref(dbus_context);
|
2019-10-11 11:58:09 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
if (local_error) {
|
|
|
|
|
g_propagate_error(error, local_error);
|
|
|
|
|
return FALSE;
|
|
|
|
|
}
|
2019-10-11 11:58:09 +02:00
|
|
|
return TRUE;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*****************************************************************************/
|
|
|
|
|
|
|
|
|
|
static void
|
2021-11-09 13:28:54 +01:00
|
|
|
init_async(GAsyncInitable *initable,
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
int io_priority,
|
2021-11-09 13:28:54 +01:00
|
|
|
GCancellable *cancellable,
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
GAsyncReadyCallback callback,
|
2019-10-11 11:58:09 +02:00
|
|
|
gpointer user_data)
|
|
|
|
|
{
|
2021-11-09 13:28:54 +01:00
|
|
|
NMClientPrivate *priv;
|
|
|
|
|
NMClient *self;
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
nm_auto_pop_gmaincontext GMainContext *context = NULL;
|
2021-11-09 13:28:54 +01:00
|
|
|
GTask *task;
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
|
|
|
|
|
g_return_if_fail(NM_IS_CLIENT(initable));
|
|
|
|
|
|
|
|
|
|
self = NM_CLIENT(initable);
|
|
|
|
|
priv = NM_CLIENT_GET_PRIVATE(self);
|
|
|
|
|
|
|
|
|
|
g_return_if_fail(!priv->dbus_context);
|
|
|
|
|
|
|
|
|
|
priv->dbus_context = g_main_context_ref(priv->main_context);
|
|
|
|
|
|
|
|
|
|
context = nm_g_main_context_push_thread_default_if_necessary(priv->main_context);
|
|
|
|
|
|
|
|
|
|
task = nm_g_task_new(self, cancellable, init_async, callback, user_data);
|
|
|
|
|
g_task_set_priority(task, io_priority);
|
|
|
|
|
|
2019-12-17 14:38:29 +01:00
|
|
|
priv->init_data = nml_init_data_new_async(cancellable, g_steal_pointer(&task));
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
|
|
|
|
|
_init_start(self);
|
2019-10-11 11:58:09 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static gboolean
|
|
|
|
|
init_finish(GAsyncInitable *initable, GAsyncResult *result, GError **error)
|
|
|
|
|
{
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
g_return_val_if_fail(NM_IS_CLIENT(initable), FALSE);
|
|
|
|
|
g_return_val_if_fail(nm_g_task_is_valid(result, initable, init_async), FALSE);
|
2019-10-11 11:58:09 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
return g_task_propagate_boolean(G_TASK(result), error);
|
2019-10-11 11:58:09 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*****************************************************************************/
|
|
|
|
|
|
|
|
|
|
static void
|
2019-10-12 14:17:16 +02:00
|
|
|
nm_client_init(NMClient *self)
|
2019-10-11 11:58:09 +02:00
|
|
|
{
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
NMClientPrivate *priv = NM_CLIENT_GET_PRIVATE(self);
|
|
|
|
|
|
2019-12-05 15:53:51 +01:00
|
|
|
priv->permissions_state = NM_TERNARY_DEFAULT;
|
|
|
|
|
|
libnm: fix leaking internal GMainContext for synchronously initialized NMClient
NMClient makes asynchronous D-Bus calls via g_dbus_connection_call().
This references the current GMainContext to later invoke the
asynchronous callback. Even when we cancel the asynchronous call,
the callback will still be invoked (later) to complete the request.
In particular this means when we destroy (unref) an NMClient, there
are quite possibly pending requests in the GMainContext. Although they
are cancelled, they keep the GMainContext alive.
With synchronous initialization, we have an internal GMainContext.
When we destroy the NMClient, we cannot just unhook the integrated
source, instead, we need to keep it integrated in the caller's main
context, as long as there are pending requests.
Add a mechanism to track those pending requests and fix the leak for the
internal GMainContext. Also expose the same mechanism to the user via a new
API called nm_client_get_context_busy_watcher(). This allows the user
to know when it can stop iterating the main context and when all
resources are reclaimed.
For example the following will lead to a crash:
for i in range(1,2000):
nmc = NM.Client.new(None)
This creates a number of NMClient instances and destroys them again.
Note that here the GMainContext is never iterated, because
synchronous initialization does not iterate the caller's context. So,
while we correctly unref and dispose the created NMClient instances,
there are pending requests left in the inner GMainContext. These pile
up and soon the program will crash because it runs out of file descriptors.
We can have a similar problem with asynchronous initialization, when
we create a new GMainContext per client, and don't iterate it after
we are done with the client.
Note that this patch does not avoid the problem in general. The problem
cannot be avoided, the user must iterate the main contex at some point.
Otherwise resources (memory and file descriptors) will be leaked.
Fixes: ce0e898fb476 ('libnm: refactor caching of D-Bus objects in NMClient')
https://gitlab.freedesktop.org/NetworkManager/NetworkManager/merge_requests/347
2019-11-26 00:23:41 +01:00
|
|
|
priv->context_busy_watcher = g_object_new(G_TYPE_OBJECT, NULL);
|
|
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
c_list_init(&self->obj_base.queue_notify_lst);
|
|
|
|
|
c_list_init(&priv->queue_notify_lst_head);
|
|
|
|
|
c_list_init(&priv->notify_event_lst_head);
|
|
|
|
|
|
|
|
|
|
priv->dbus_objects = g_hash_table_new(nm_pdirect_hash, nm_pdirect_equal);
|
|
|
|
|
c_list_init(&priv->dbus_objects_lst_head_watched_only);
|
|
|
|
|
c_list_init(&priv->dbus_objects_lst_head_on_dbus);
|
|
|
|
|
c_list_init(&priv->dbus_objects_lst_head_with_nmobj_not_ready);
|
|
|
|
|
c_list_init(&priv->dbus_objects_lst_head_with_nmobj_ready);
|
|
|
|
|
c_list_init(&priv->obj_changed_lst_head);
|
2019-10-11 11:58:09 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* nm_client_new:
|
|
|
|
|
* @cancellable: a #GCancellable, or %NULL
|
|
|
|
|
* @error: location for a #GError, or %NULL
|
|
|
|
|
*
|
2021-03-29 09:10:43 +02:00
|
|
|
* Creates a new #NMClient synchronously.
|
|
|
|
|
*
|
|
|
|
|
* Note that this will block until a NMClient instance is fully initialized.
|
|
|
|
|
* This does nothing beside calling g_initable_new(). You are free to call
|
|
|
|
|
* g_initable_new() or g_object_new()/g_initable_init() directly for more
|
|
|
|
|
* control, to set GObject properties or get access to the NMClient instance
|
|
|
|
|
* while it is still initializing.
|
|
|
|
|
*
|
|
|
|
|
* Using the synchronous initialization creates an #NMClient instance
|
2021-04-18 08:34:27 +02:00
|
|
|
* that uses an internal #GMainContext. This context is invisible to the
|
|
|
|
|
* user. This introduces an additional overhead that is payed not
|
|
|
|
|
* only during object initialization, but for the entire lifetime of
|
|
|
|
|
* this object.
|
|
|
|
|
* Also, due to this internal #GMainContext, the events are no longer
|
|
|
|
|
* in sync with other messages from #GDBusConnection (but all events
|
|
|
|
|
* of the NMClient will themselves still be ordered).
|
|
|
|
|
* For a serious program, you should therefore avoid these problems by
|
|
|
|
|
* using g_async_initable_init_async() or nm_client_new_async() instead.
|
|
|
|
|
* The sync initialization is still useful for simple scripts or interactive
|
|
|
|
|
* testing for example via pygobject.
|
2021-03-29 09:10:43 +02:00
|
|
|
*
|
|
|
|
|
* Creating an #NMClient instance can only fail for two reasons. First, if you didn't
|
|
|
|
|
* provide a %NM_CLIENT_DBUS_CONNECTION and the call to g_bus_get()
|
|
|
|
|
* fails. You can avoid that by using g_initable_new() directly and
|
|
|
|
|
* set a D-Bus connection.
|
|
|
|
|
* Second, if you cancelled the creation. If you do that, then note
|
|
|
|
|
* that after the failure there might still be idle actions pending
|
|
|
|
|
* which keep nm_client_get_main_context() alive. That means,
|
|
|
|
|
* in that case you must continue iterating the context to avoid
|
|
|
|
|
* leaks. See nm_client_get_context_busy_watcher().
|
|
|
|
|
*
|
|
|
|
|
* Creating an #NMClient instance when NetworkManager is not running
|
|
|
|
|
* does not cause a failure.
|
2019-10-11 11:58:09 +02:00
|
|
|
*
|
|
|
|
|
* Returns: a new #NMClient or NULL on an error
|
|
|
|
|
**/
|
|
|
|
|
NMClient *
|
|
|
|
|
nm_client_new(GCancellable *cancellable, GError **error)
|
|
|
|
|
{
|
|
|
|
|
return g_initable_new(NM_TYPE_CLIENT, cancellable, error, NULL);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* nm_client_new_async:
|
|
|
|
|
* @cancellable: a #GCancellable, or %NULL
|
|
|
|
|
* @callback: callback to call when the client is created
|
|
|
|
|
* @user_data: data for @callback
|
|
|
|
|
*
|
2021-03-29 09:10:43 +02:00
|
|
|
* Creates a new #NMClient asynchronously.
|
|
|
|
|
* @callback will be called when it is done. Use
|
|
|
|
|
* nm_client_new_finish() to get the result.
|
|
|
|
|
*
|
|
|
|
|
* This does nothing beside calling g_async_initable_new_async(). You are free to
|
|
|
|
|
* call g_async_initable_new_async() or g_object_new()/g_async_initable_init_async()
|
|
|
|
|
* directly for more control, to set GObject properties or get access to the NMClient
|
|
|
|
|
* instance while it is still initializing.
|
|
|
|
|
*
|
|
|
|
|
* Creating an #NMClient instance can only fail for two reasons. First, if you didn't
|
|
|
|
|
* provide a %NM_CLIENT_DBUS_CONNECTION and the call to g_bus_get()
|
2021-04-18 08:34:27 +02:00
|
|
|
* fails. You can avoid that by using g_async_initable_new_async() directly and
|
2021-03-29 09:10:43 +02:00
|
|
|
* set a D-Bus connection.
|
|
|
|
|
* Second, if you cancelled the creation. If you do that, then note
|
|
|
|
|
* that after the failure there might still be idle actions pending
|
|
|
|
|
* which keep nm_client_get_main_context() alive. That means,
|
|
|
|
|
* in that case you must continue iterating the context to avoid
|
|
|
|
|
* leaks. See nm_client_get_context_busy_watcher().
|
|
|
|
|
*
|
|
|
|
|
* Creating an #NMClient instance when NetworkManager is not running
|
|
|
|
|
* does not cause a failure.
|
2019-10-11 11:58:09 +02:00
|
|
|
**/
|
|
|
|
|
void
|
|
|
|
|
nm_client_new_async(GCancellable *cancellable, GAsyncReadyCallback callback, gpointer user_data)
|
|
|
|
|
{
|
2019-10-14 13:16:49 +02:00
|
|
|
g_async_initable_new_async(NM_TYPE_CLIENT,
|
|
|
|
|
G_PRIORITY_DEFAULT,
|
|
|
|
|
cancellable,
|
|
|
|
|
callback,
|
|
|
|
|
user_data,
|
2019-10-11 11:58:09 +02:00
|
|
|
NULL);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* nm_client_new_finish:
|
|
|
|
|
* @result: a #GAsyncResult
|
|
|
|
|
* @error: location for a #GError, or %NULL
|
|
|
|
|
*
|
|
|
|
|
* Gets the result of an nm_client_new_async() call.
|
|
|
|
|
*
|
|
|
|
|
* Returns: a new #NMClient, or %NULL on error
|
|
|
|
|
**/
|
|
|
|
|
NMClient *
|
|
|
|
|
nm_client_new_finish(GAsyncResult *result, GError **error)
|
|
|
|
|
{
|
2019-10-14 13:16:49 +02:00
|
|
|
gs_unref_object GObject *source_object = NULL;
|
2021-11-09 13:28:54 +01:00
|
|
|
GObject *object;
|
2019-10-11 11:58:09 +02:00
|
|
|
|
2019-10-14 13:16:49 +02:00
|
|
|
source_object = g_async_result_get_source_object(result);
|
|
|
|
|
g_return_val_if_fail(source_object, NULL);
|
2019-10-11 11:58:09 +02:00
|
|
|
|
2019-10-14 13:16:49 +02:00
|
|
|
object = g_async_initable_new_finish(G_ASYNC_INITABLE(source_object), result, error);
|
|
|
|
|
g_return_val_if_fail(!object || NM_IS_CLIENT(object), FALSE);
|
|
|
|
|
|
|
|
|
|
return NM_CLIENT(object);
|
2019-10-11 11:58:09 +02:00
|
|
|
}
|
|
|
|
|
|
2019-10-12 14:17:16 +02:00
|
|
|
static void
|
|
|
|
|
constructed(GObject *object)
|
|
|
|
|
{
|
2021-11-09 13:28:54 +01:00
|
|
|
NMClient *self = NM_CLIENT(object);
|
2019-10-12 14:17:16 +02:00
|
|
|
NMClientPrivate *priv = NM_CLIENT_GET_PRIVATE(self);
|
|
|
|
|
|
|
|
|
|
priv->main_context = g_main_context_ref_thread_default();
|
|
|
|
|
|
|
|
|
|
G_OBJECT_CLASS(nm_client_parent_class)->constructed(object);
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
|
|
|
|
|
NML_NMCLIENT_LOG_D(self, "new NMClient instance");
|
2019-10-12 14:17:16 +02:00
|
|
|
}
|
|
|
|
|
|
2019-10-11 11:58:09 +02:00
|
|
|
static void
|
|
|
|
|
dispose(GObject *object)
|
|
|
|
|
{
|
2021-11-09 13:28:54 +01:00
|
|
|
NMClient *self = NM_CLIENT(object);
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
NMClientPrivate *priv = NM_CLIENT_GET_PRIVATE(self);
|
2019-10-11 11:58:09 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
nm_assert(!priv->init_data);
|
2019-10-11 11:58:09 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
self->obj_base.is_disposing = TRUE;
|
2019-10-11 11:58:09 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
nm_clear_g_cancellable(&priv->name_owner_get_cancellable);
|
2019-10-11 11:58:09 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
nm_clear_g_dbus_connection_signal(priv->dbus_connection, &priv->name_owner_changed_id);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
nm_clear_g_free(&priv->name_owner);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
_init_release_all(self);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
nm_assert(c_list_is_empty(&priv->dbus_objects_lst_head_watched_only));
|
|
|
|
|
nm_assert(c_list_is_empty(&priv->dbus_objects_lst_head_on_dbus));
|
|
|
|
|
nm_assert(c_list_is_empty(&priv->dbus_objects_lst_head_with_nmobj_not_ready));
|
|
|
|
|
nm_assert(c_list_is_empty(&priv->dbus_objects_lst_head_with_nmobj_ready));
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
nm_assert(c_list_is_empty(&priv->queue_notify_lst_head));
|
|
|
|
|
nm_assert(c_list_is_empty(&priv->notify_event_lst_head));
|
|
|
|
|
nm_assert(c_list_is_empty(&self->obj_base.queue_notify_lst));
|
2020-12-08 15:41:46 +01:00
|
|
|
nm_assert(nm_g_hash_table_size(priv->dbus_objects) == 0);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
nml_dbus_property_o_clear_many(priv->nm.property_o, G_N_ELEMENTS(priv->nm.property_o), NULL);
|
|
|
|
|
nml_dbus_property_ao_clear_many(priv->nm.property_ao, G_N_ELEMENTS(priv->nm.property_ao), NULL);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
nm_clear_g_free(&priv->nm.connectivity_check_uri);
|
|
|
|
|
nm_clear_g_free(&priv->nm.version);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
nml_dbus_property_ao_clear(&priv->settings.connections, NULL);
|
|
|
|
|
nm_clear_g_free(&priv->settings.hostname);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
nm_clear_pointer(&priv->dns_manager.configuration, g_ptr_array_unref);
|
|
|
|
|
nm_clear_g_free(&priv->dns_manager.mode);
|
|
|
|
|
nm_clear_g_free(&priv->dns_manager.rc_manager);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
nm_clear_pointer(&priv->dbus_objects, g_hash_table_destroy);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
G_OBJECT_CLASS(nm_client_parent_class)->dispose(object);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
nm_clear_pointer(&priv->udev, udev_unref);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
if (priv->context_busy_watcher && priv->dbus_context) {
|
|
|
|
|
nml_cleanup_context_busy_watcher_on_idle(g_steal_pointer(&priv->context_busy_watcher),
|
2019-10-11 11:58:09 +02:00
|
|
|
priv->dbus_context);
|
2020-09-28 16:03:33 +02:00
|
|
|
}
|
|
|
|
|
|
2019-10-11 11:58:09 +02:00
|
|
|
nm_clear_pointer(&priv->dbus_context, g_main_context_unref);
|
2019-10-12 14:17:16 +02:00
|
|
|
nm_clear_pointer(&priv->main_context, g_main_context_unref);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2020-01-14 13:58:20 +01:00
|
|
|
nm_clear_g_free(&priv->permissions);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2020-01-14 13:58:20 +01:00
|
|
|
g_clear_object(&priv->dbus_connection);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2020-01-20 11:09:32 +01:00
|
|
|
g_clear_object(&priv->context_busy_watcher);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2020-01-20 11:09:32 +01:00
|
|
|
nm_clear_g_free(&priv->name_owner);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2020-01-20 11:09:32 +01:00
|
|
|
priv->nm.capabilities_len = 0;
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
nm_clear_g_free(&priv->nm.capabilities_arr);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
NML_NMCLIENT_LOG_D(self, "disposed");
|
2020-09-28 16:03:33 +02:00
|
|
|
}
|
|
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
const NMLDBusMetaIface _nml_dbus_meta_iface_nm_agentmanager =
|
|
|
|
|
NML_DBUS_META_IFACE_INIT(NM_DBUS_INTERFACE_AGENT_MANAGER,
|
2020-09-28 16:03:33 +02:00
|
|
|
NULL,
|
2019-10-12 14:17:16 +02:00
|
|
|
NML_DBUS_META_INTERFACE_PRIO_NONE, );
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2019-10-12 14:17:16 +02:00
|
|
|
const NMLDBusMetaIface _nml_dbus_meta_iface_nm = NML_DBUS_META_IFACE_INIT_PROP(
|
|
|
|
|
NM_DBUS_INTERFACE,
|
|
|
|
|
nm_client_get_type,
|
|
|
|
|
NML_DBUS_META_INTERFACE_PRIO_NMCLIENT,
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
NML_DBUS_META_IFACE_DBUS_PROPERTIES(
|
|
|
|
|
NML_DBUS_META_PROPERTY_INIT_O_PROP(
|
|
|
|
|
"ActivatingConnection",
|
2019-10-12 14:17:16 +02:00
|
|
|
PROP_ACTIVATING_CONNECTION,
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
NMClient,
|
2019-10-12 14:17:16 +02:00
|
|
|
_priv.nm.property_o[PROPERTY_O_IDX_NM_ACTIVATING_CONNECTION],
|
2019-12-06 11:48:04 +01:00
|
|
|
nm_active_connection_get_type),
|
2019-10-12 14:59:23 +02:00
|
|
|
NML_DBUS_META_PROPERTY_INIT_AO_PROP(
|
|
|
|
|
"ActiveConnections",
|
|
|
|
|
PROP_ACTIVE_CONNECTIONS,
|
2020-01-03 13:09:37 +01:00
|
|
|
NMClient,
|
|
|
|
|
_priv.nm.property_ao[PROPERTY_AO_IDX_ACTIVE_CONNECTIONS],
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
nm_active_connection_get_type,
|
|
|
|
|
.notify_changed_ao = _property_ao_notify_changed_active_connections_cb),
|
|
|
|
|
NML_DBUS_META_PROPERTY_INIT_AO_PROP("AllDevices",
|
|
|
|
|
PROP_ALL_DEVICES,
|
|
|
|
|
NMClient,
|
|
|
|
|
_priv.nm.property_ao[PROPERTY_AO_IDX_ALL_DEVICES],
|
|
|
|
|
nm_device_get_type,
|
|
|
|
|
.notify_changed_ao =
|
|
|
|
|
_property_ao_notify_changed_all_devices_cb),
|
|
|
|
|
NML_DBUS_META_PROPERTY_INIT_FCN("Capabilities",
|
2019-12-20 18:10:55 +01:00
|
|
|
PROP_CAPABILITIES,
|
2020-09-28 16:03:33 +02:00
|
|
|
"au",
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
_notify_update_prop_nm_capabilities, ),
|
|
|
|
|
NML_DBUS_META_PROPERTY_INIT_AO_PROP("Checkpoints",
|
|
|
|
|
PROP_CHECKPOINTS,
|
|
|
|
|
NMClient,
|
|
|
|
|
_priv.nm.property_ao[PROPERTY_AO_IDX_CHECKPOINTS],
|
|
|
|
|
nm_checkpoint_get_type),
|
|
|
|
|
NML_DBUS_META_PROPERTY_INIT_U("Connectivity",
|
|
|
|
|
PROP_CONNECTIVITY,
|
|
|
|
|
NMClient,
|
|
|
|
|
_priv.nm.connectivity),
|
|
|
|
|
NML_DBUS_META_PROPERTY_INIT_B("ConnectivityCheckAvailable",
|
|
|
|
|
PROP_CONNECTIVITY_CHECK_AVAILABLE,
|
|
|
|
|
NMClient,
|
|
|
|
|
_priv.nm.connectivity_check_available),
|
|
|
|
|
NML_DBUS_META_PROPERTY_INIT_B("ConnectivityCheckEnabled",
|
|
|
|
|
PROP_CONNECTIVITY_CHECK_ENABLED,
|
|
|
|
|
NMClient,
|
|
|
|
|
_priv.nm.connectivity_check_enabled),
|
|
|
|
|
NML_DBUS_META_PROPERTY_INIT_S("ConnectivityCheckUri",
|
|
|
|
|
PROP_CONNECTIVITY_CHECK_URI,
|
|
|
|
|
NMClient,
|
|
|
|
|
_priv.nm.connectivity_check_uri),
|
|
|
|
|
NML_DBUS_META_PROPERTY_INIT_AO_PROP("Devices",
|
|
|
|
|
PROP_DEVICES,
|
|
|
|
|
NMClient,
|
|
|
|
|
_priv.nm.property_ao[PROPERTY_AO_IDX_DEVICES],
|
|
|
|
|
nm_device_get_type,
|
|
|
|
|
.notify_changed_ao =
|
|
|
|
|
_property_ao_notify_changed_devices_cb),
|
|
|
|
|
NML_DBUS_META_PROPERTY_INIT_IGNORE("GlobalDnsConfiguration", "a{sv}"),
|
|
|
|
|
NML_DBUS_META_PROPERTY_INIT_U("Metered", PROP_METERED, NMClient, _priv.nm.metered),
|
|
|
|
|
NML_DBUS_META_PROPERTY_INIT_B("NetworkingEnabled",
|
|
|
|
|
PROP_NETWORKING_ENABLED,
|
|
|
|
|
NMClient,
|
|
|
|
|
_priv.nm.networking_enabled),
|
|
|
|
|
NML_DBUS_META_PROPERTY_INIT_O_PROP("PrimaryConnection",
|
|
|
|
|
PROP_PRIMARY_CONNECTION,
|
|
|
|
|
NMClient,
|
|
|
|
|
_priv.nm.property_o[PROPERTY_O_IDX_NM_PRIMAY_CONNECTION],
|
|
|
|
|
nm_active_connection_get_type),
|
|
|
|
|
NML_DBUS_META_PROPERTY_INIT_IGNORE("PrimaryConnectionType", "s"),
|
2022-03-21 10:20:11 +01:00
|
|
|
NML_DBUS_META_PROPERTY_INIT_U("RadioFlags",
|
|
|
|
|
PROP_RADIO_FLAGS,
|
|
|
|
|
NMClient,
|
|
|
|
|
_priv.nm.radio_flags),
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
NML_DBUS_META_PROPERTY_INIT_B("Startup", PROP_STARTUP, NMClient, _priv.nm.startup),
|
|
|
|
|
NML_DBUS_META_PROPERTY_INIT_U("State", PROP_STATE, NMClient, _priv.nm.state),
|
|
|
|
|
NML_DBUS_META_PROPERTY_INIT_S("Version", PROP_VERSION, NMClient, _priv.nm.version),
|
core: add "VersionInfo" property on D-Bus and NMClient
This exposes NM_VERSION as number (contrary to the "Version", which is a
string). That is in particular useful, because the number can be
compared with <> due to the encoding of the version.
While at it, don't make it a single number. Expose an array of numbers,
where the following numbers are a bitfield of capabilities.
Note that before commit 3c67a1ec5e8e ('cli: remove version check against
NM'), we used to parse the "Version" string to detect the version. As
such, the information that "VersionInfo" exposes now, was already
(somewhat) available, you just had to parse the string. The main benefit of
"VersionInfo" is that it can expose capabilities (patched behavior) in
in a lightweight bitfield. To include the numerical version there is
just useful on top.
Currently no additional capabilities are exposed. The idea is of course
to have a place in the future, where we can expose additional
capabilities. Adding a capability flag is most useful for behavior that we
backport to older branches. Otherwise, we could just check the daemon version
alone. But since we only add "VersionInfo" property only now, we cannot backport
any capability further than this, because the "VersionInfo" property itself
won't be backported. As such, this will only be useful in the future by having
a place where we can add (and backport) capabilities.
Note that there is some overlap with the existing "Capability" property
and NMCapability enum. The difference is that adding a capability via "VersionInfo"
is only one bit, and thus cheaper. Most importantly, having it cheaper means
the downsides of adding a capability flag is significantly removed. In
practice, we could live without capabilities for a long time, so they
must be very cheap for them to be worth to add. Another difference might be,
that we will want that the VersionInfo is about compile time defaults (e.g.
a certain patch/behavior that is in or not), while NM_CAPABILITY_TEAM depends on
whether the team plugin is loaded at runtime.
2022-12-13 12:25:04 +01:00
|
|
|
NML_DBUS_META_PROPERTY_INIT_FCN("VersionInfo",
|
|
|
|
|
PROP_VERSION_INFO,
|
|
|
|
|
"au",
|
|
|
|
|
_notify_update_prop_nm_version_info, ),
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
NML_DBUS_META_PROPERTY_INIT_IGNORE("WimaxEnabled", "b"),
|
|
|
|
|
NML_DBUS_META_PROPERTY_INIT_IGNORE("WimaxHardwareEnabled", "b"),
|
|
|
|
|
NML_DBUS_META_PROPERTY_INIT_B("WirelessEnabled",
|
|
|
|
|
PROP_WIRELESS_ENABLED,
|
|
|
|
|
NMClient,
|
|
|
|
|
_priv.nm.wireless_enabled),
|
|
|
|
|
NML_DBUS_META_PROPERTY_INIT_B("WirelessHardwareEnabled",
|
|
|
|
|
PROP_WIRELESS_HARDWARE_ENABLED,
|
|
|
|
|
NMClient,
|
|
|
|
|
_priv.nm.wireless_hardware_enabled),
|
|
|
|
|
NML_DBUS_META_PROPERTY_INIT_B("WwanEnabled",
|
|
|
|
|
PROP_WWAN_ENABLED,
|
|
|
|
|
NMClient,
|
|
|
|
|
_priv.nm.wwan_enabled),
|
|
|
|
|
NML_DBUS_META_PROPERTY_INIT_B("WwanHardwareEnabled",
|
|
|
|
|
PROP_WWAN_HARDWARE_ENABLED,
|
|
|
|
|
NMClient,
|
|
|
|
|
_priv.nm.wwan_hardware_enabled), ), );
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
const NMLDBusMetaIface _nml_dbus_meta_iface_nm_settings = NML_DBUS_META_IFACE_INIT_PROP(
|
|
|
|
|
NM_DBUS_INTERFACE_SETTINGS,
|
|
|
|
|
nm_client_get_type,
|
|
|
|
|
NML_DBUS_META_INTERFACE_PRIO_NMCLIENT,
|
|
|
|
|
NML_DBUS_META_IFACE_DBUS_PROPERTIES(
|
|
|
|
|
NML_DBUS_META_PROPERTY_INIT_B("CanModify",
|
|
|
|
|
PROP_CAN_MODIFY,
|
|
|
|
|
NMClient,
|
|
|
|
|
_priv.settings.can_modify),
|
|
|
|
|
NML_DBUS_META_PROPERTY_INIT_AO_PROP(
|
|
|
|
|
"Connections",
|
|
|
|
|
PROP_CONNECTIONS,
|
|
|
|
|
NMClient,
|
|
|
|
|
_priv.settings.connections,
|
|
|
|
|
nm_remote_connection_get_type,
|
|
|
|
|
.notify_changed_ao = _property_ao_notify_changed_connections_cb,
|
2025-05-13 11:43:33 +02:00
|
|
|
.check_nmobj_visible_fcn = (gboolean (*)(GObject *)) nm_remote_connection_get_visible),
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
NML_DBUS_META_PROPERTY_INIT_S("Hostname",
|
|
|
|
|
PROP_HOSTNAME,
|
|
|
|
|
NMClient,
|
|
|
|
|
_priv.settings.hostname), ), );
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
const NMLDBusMetaIface _nml_dbus_meta_iface_nm_dnsmanager = NML_DBUS_META_IFACE_INIT_PROP(
|
|
|
|
|
NM_DBUS_INTERFACE_DNS_MANAGER,
|
|
|
|
|
nm_client_get_type,
|
|
|
|
|
NML_DBUS_META_INTERFACE_PRIO_NMCLIENT,
|
|
|
|
|
NML_DBUS_META_IFACE_DBUS_PROPERTIES(
|
|
|
|
|
NML_DBUS_META_PROPERTY_INIT_FCN("Configuration",
|
|
|
|
|
PROP_DNS_CONFIGURATION,
|
|
|
|
|
"aa{sv}",
|
|
|
|
|
_notify_update_prop_dns_manager_configuration),
|
|
|
|
|
NML_DBUS_META_PROPERTY_INIT_S("Mode", PROP_DNS_MODE, NMClient, _priv.dns_manager.mode),
|
|
|
|
|
NML_DBUS_META_PROPERTY_INIT_S("RcManager",
|
|
|
|
|
PROP_DNS_RC_MANAGER,
|
|
|
|
|
NMClient,
|
|
|
|
|
_priv.dns_manager.rc_manager), ), );
|
|
|
|
|
|
2014-07-24 08:53:33 -04:00
|
|
|
static void
|
|
|
|
|
nm_client_class_init(NMClientClass *client_class)
|
|
|
|
|
{
|
|
|
|
|
GObjectClass *object_class = G_OBJECT_CLASS(client_class);
|
|
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
_dbus_path_nm = nm_ref_string_new(NM_DBUS_PATH);
|
|
|
|
|
_dbus_path_settings = nm_ref_string_new(NM_DBUS_PATH_SETTINGS);
|
|
|
|
|
_dbus_path_dns_manager = nm_ref_string_new(NM_DBUS_PATH_DNS_MANAGER);
|
|
|
|
|
|
2014-07-24 08:53:33 -04:00
|
|
|
object_class->get_property = get_property;
|
2019-10-10 17:57:30 +02:00
|
|
|
object_class->set_property = set_property;
|
2019-10-12 14:17:16 +02:00
|
|
|
object_class->constructed = constructed;
|
2019-10-10 17:57:30 +02:00
|
|
|
object_class->dispose = dispose;
|
2014-07-24 08:53:33 -04:00
|
|
|
|
2019-10-12 14:59:23 +02:00
|
|
|
/**
|
|
|
|
|
* NMClient:dbus-connection:
|
|
|
|
|
*
|
|
|
|
|
* The #GDBusConnection to use.
|
|
|
|
|
*
|
|
|
|
|
* If this is not set during object construction, the D-Bus connection will
|
|
|
|
|
* automatically be chosen during async/sync initalization via g_bus_get().
|
|
|
|
|
*
|
|
|
|
|
* Since: 1.22
|
|
|
|
|
*/
|
|
|
|
|
obj_properties[PROP_DBUS_CONNECTION] = g_param_spec_object(
|
|
|
|
|
NM_CLIENT_DBUS_CONNECTION,
|
|
|
|
|
"",
|
|
|
|
|
"",
|
|
|
|
|
G_TYPE_DBUS_CONNECTION,
|
|
|
|
|
G_PARAM_READABLE | G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY | G_PARAM_STATIC_STRINGS);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2019-12-05 18:17:43 +01:00
|
|
|
/**
|
|
|
|
|
* NMClient:instance-flags:
|
|
|
|
|
*
|
|
|
|
|
* #NMClientInstanceFlags for the instance. These affect behavior of #NMClient.
|
2019-12-05 15:53:51 +01:00
|
|
|
* This is a construct property and you may only set most flags only during
|
2019-12-05 18:17:43 +01:00
|
|
|
* construction.
|
|
|
|
|
*
|
2019-12-05 15:53:51 +01:00
|
|
|
* The flag %NM_CLIENT_INSTANCE_FLAGS_NO_AUTO_FETCH_PERMISSIONS can be toggled any time,
|
|
|
|
|
* even after constructing the instance. Note that you may want to watch NMClient:permissions-state
|
|
|
|
|
* property to know whether permissions are ready. Note that permissions are only fetched
|
|
|
|
|
* when NMClient has a D-Bus name owner.
|
|
|
|
|
*
|
2022-10-06 10:13:14 +02:00
|
|
|
* The flags %NM_CLIENT_INSTANCE_FLAGS_INITIALIZED_GOOD and %NM_CLIENT_INSTANCE_FLAGS_INITIALIZED_BAD
|
|
|
|
|
* cannot be set, however they will be returned by the getter after initialization completes.
|
|
|
|
|
*
|
2019-12-05 18:17:43 +01:00
|
|
|
* Since: 1.24
|
|
|
|
|
*/
|
|
|
|
|
obj_properties[PROP_INSTANCE_FLAGS] = g_param_spec_uint(
|
|
|
|
|
NM_CLIENT_INSTANCE_FLAGS,
|
|
|
|
|
"",
|
|
|
|
|
"",
|
|
|
|
|
0,
|
|
|
|
|
G_MAXUINT32,
|
|
|
|
|
0,
|
|
|
|
|
G_PARAM_READABLE | G_PARAM_WRITABLE | G_PARAM_CONSTRUCT | G_PARAM_STATIC_STRINGS);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2019-10-15 16:08:36 +02:00
|
|
|
/**
|
|
|
|
|
* NMClient:dbus-name-owner:
|
|
|
|
|
*
|
|
|
|
|
* The name owner of the NetworkManager D-Bus service.
|
|
|
|
|
*
|
|
|
|
|
* Since: 1.22
|
|
|
|
|
**/
|
|
|
|
|
obj_properties[PROP_DBUS_NAME_OWNER] =
|
|
|
|
|
g_param_spec_string(NM_CLIENT_DBUS_NAME_OWNER,
|
|
|
|
|
"",
|
|
|
|
|
"",
|
|
|
|
|
NULL,
|
|
|
|
|
G_PARAM_READABLE | G_PARAM_STATIC_STRINGS);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2014-07-24 08:53:33 -04:00
|
|
|
/**
|
|
|
|
|
* NMClient:version:
|
|
|
|
|
*
|
|
|
|
|
* The NetworkManager version.
|
|
|
|
|
**/
|
2019-10-10 17:57:30 +02:00
|
|
|
obj_properties[PROP_VERSION] = g_param_spec_string(NM_CLIENT_VERSION,
|
|
|
|
|
"",
|
|
|
|
|
"",
|
|
|
|
|
NULL,
|
|
|
|
|
G_PARAM_READABLE | G_PARAM_STATIC_STRINGS);
|
2014-07-24 08:53:33 -04:00
|
|
|
|
core: add "VersionInfo" property on D-Bus and NMClient
This exposes NM_VERSION as number (contrary to the "Version", which is a
string). That is in particular useful, because the number can be
compared with <> due to the encoding of the version.
While at it, don't make it a single number. Expose an array of numbers,
where the following numbers are a bitfield of capabilities.
Note that before commit 3c67a1ec5e8e ('cli: remove version check against
NM'), we used to parse the "Version" string to detect the version. As
such, the information that "VersionInfo" exposes now, was already
(somewhat) available, you just had to parse the string. The main benefit of
"VersionInfo" is that it can expose capabilities (patched behavior) in
in a lightweight bitfield. To include the numerical version there is
just useful on top.
Currently no additional capabilities are exposed. The idea is of course
to have a place in the future, where we can expose additional
capabilities. Adding a capability flag is most useful for behavior that we
backport to older branches. Otherwise, we could just check the daemon version
alone. But since we only add "VersionInfo" property only now, we cannot backport
any capability further than this, because the "VersionInfo" property itself
won't be backported. As such, this will only be useful in the future by having
a place where we can add (and backport) capabilities.
Note that there is some overlap with the existing "Capability" property
and NMCapability enum. The difference is that adding a capability via "VersionInfo"
is only one bit, and thus cheaper. Most importantly, having it cheaper means
the downsides of adding a capability flag is significantly removed. In
practice, we could live without capabilities for a long time, so they
must be very cheap for them to be worth to add. Another difference might be,
that we will want that the VersionInfo is about compile time defaults (e.g.
a certain patch/behavior that is in or not), while NM_CAPABILITY_TEAM depends on
whether the team plugin is loaded at runtime.
2022-12-13 12:25:04 +01:00
|
|
|
/**
|
|
|
|
|
* NMClient:version-info: (type GArray(guint32))
|
|
|
|
|
*
|
|
|
|
|
* Expose version info and capabilities of NetworkManager. If non-empty,
|
|
|
|
|
* the first element is NM_VERSION, which encodes the version of the
|
|
|
|
|
* daemon as "(major << 16 | minor << 8 | micro)". The following elements
|
2024-12-04 14:24:38 +01:00
|
|
|
* is a bitfields of %NMVersionInfoCapability. If a bit is set, then
|
core: add "VersionInfo" property on D-Bus and NMClient
This exposes NM_VERSION as number (contrary to the "Version", which is a
string). That is in particular useful, because the number can be
compared with <> due to the encoding of the version.
While at it, don't make it a single number. Expose an array of numbers,
where the following numbers are a bitfield of capabilities.
Note that before commit 3c67a1ec5e8e ('cli: remove version check against
NM'), we used to parse the "Version" string to detect the version. As
such, the information that "VersionInfo" exposes now, was already
(somewhat) available, you just had to parse the string. The main benefit of
"VersionInfo" is that it can expose capabilities (patched behavior) in
in a lightweight bitfield. To include the numerical version there is
just useful on top.
Currently no additional capabilities are exposed. The idea is of course
to have a place in the future, where we can expose additional
capabilities. Adding a capability flag is most useful for behavior that we
backport to older branches. Otherwise, we could just check the daemon version
alone. But since we only add "VersionInfo" property only now, we cannot backport
any capability further than this, because the "VersionInfo" property itself
won't be backported. As such, this will only be useful in the future by having
a place where we can add (and backport) capabilities.
Note that there is some overlap with the existing "Capability" property
and NMCapability enum. The difference is that adding a capability via "VersionInfo"
is only one bit, and thus cheaper. Most importantly, having it cheaper means
the downsides of adding a capability flag is significantly removed. In
practice, we could live without capabilities for a long time, so they
must be very cheap for them to be worth to add. Another difference might be,
that we will want that the VersionInfo is about compile time defaults (e.g.
a certain patch/behavior that is in or not), while NM_CAPABILITY_TEAM depends on
whether the team plugin is loaded at runtime.
2022-12-13 12:25:04 +01:00
|
|
|
* the running NetworkManager has the respective capability.
|
|
|
|
|
*
|
|
|
|
|
* Since: 1.42
|
|
|
|
|
*/
|
|
|
|
|
obj_properties[PROP_VERSION_INFO] =
|
|
|
|
|
g_param_spec_boxed(NM_CLIENT_VERSION_INFO,
|
|
|
|
|
"",
|
|
|
|
|
"",
|
|
|
|
|
G_TYPE_ARRAY,
|
|
|
|
|
G_PARAM_READABLE | G_PARAM_STATIC_STRINGS);
|
|
|
|
|
|
2014-07-24 08:53:33 -04:00
|
|
|
/**
|
|
|
|
|
* NMClient:state:
|
|
|
|
|
*
|
|
|
|
|
* The current daemon state.
|
|
|
|
|
**/
|
2019-10-10 17:57:30 +02:00
|
|
|
obj_properties[PROP_STATE] = g_param_spec_enum(NM_CLIENT_STATE,
|
|
|
|
|
"",
|
|
|
|
|
"",
|
|
|
|
|
NM_TYPE_STATE,
|
|
|
|
|
NM_STATE_UNKNOWN,
|
|
|
|
|
G_PARAM_READABLE | G_PARAM_STATIC_STRINGS);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2014-07-24 08:53:33 -04:00
|
|
|
/**
|
|
|
|
|
* NMClient:startup:
|
|
|
|
|
*
|
|
|
|
|
* Whether the daemon is still starting up.
|
|
|
|
|
**/
|
2019-10-10 17:57:30 +02:00
|
|
|
obj_properties[PROP_STARTUP] = g_param_spec_boolean(NM_CLIENT_STARTUP,
|
|
|
|
|
"",
|
|
|
|
|
"",
|
|
|
|
|
FALSE,
|
|
|
|
|
G_PARAM_READABLE | G_PARAM_STATIC_STRINGS);
|
2014-07-24 08:53:33 -04:00
|
|
|
|
|
|
|
|
/**
|
2014-08-05 10:05:26 -04:00
|
|
|
* NMClient:nm-running:
|
2014-07-24 08:53:33 -04:00
|
|
|
*
|
|
|
|
|
* Whether the daemon is running.
|
|
|
|
|
**/
|
2019-10-10 17:57:30 +02:00
|
|
|
obj_properties[PROP_NM_RUNNING] =
|
|
|
|
|
g_param_spec_boolean(NM_CLIENT_NM_RUNNING,
|
|
|
|
|
"",
|
|
|
|
|
"",
|
|
|
|
|
FALSE,
|
|
|
|
|
G_PARAM_READABLE | G_PARAM_STATIC_STRINGS);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2014-07-24 08:53:33 -04:00
|
|
|
/**
|
|
|
|
|
* NMClient:networking-enabled:
|
|
|
|
|
*
|
|
|
|
|
* Whether networking is enabled.
|
libnm: deprecate synchronous/blocking API in libnm
Note that D-Bus is fundamentally asynchronous. Doing blocking calls
on top of D-Bus is odd, especially for libnm's NMClient. That is because
NMClient essentially is a client-side cache of the objects from the D-Bus
interface. This cache should be filled exclusively by (asynchronous) D-Bus
events (PropertiesChanged). So, making a blocking D-Bus call means to wait
for a response and return it, while queuing all messages that are received
in the meantime.
Basically there are three ways how a synchronous API on NMClient could behave:
1) the call just calls g_dbus_connection_call_sync(). This means
that libnm sends a D-Bus request via GDBusConnection, and blockingly
waits for the response. All D-Bus messages that get received in the
meantime are queued in the GMainContext that belongs to NMClient.
That means, none of these D-Bus events are processed until we
iterate the GMainContext after the call returns. The effect is,
that NMClient (and all cached objects in there) are unaffected by
the D-Bus request.
Most of the synchronous API calls in libnm are of this kind.
The problem is that the strict ordering of D-Bus events gets
violated.
For some API this is not an immediate problem. Take for example
nm_device_wifi_request_scan(). The call merely blockingly tells
NetworkManager to start scanning, but since NetworkManager's D-Bus
API does not directly expose any state that tells whether we are
currently scanning, this out of order processing of the D-Bus
request is a small issue.
The problem is more obvious for nm_client_networking_set_enabled().
After calling it, NM_CLIENT_NETWORKING_ENABLED is still unaffected
and unchanged, because the PropertiesChanged signal from D-Bus
is not yet processed.
This means, while you make such a blocking call, NMClient's state
does not change. But usually you perform the synchronous call
to change some state. In this form, the blocking call is not useful,
because NMClient only changes the state after iterating the GMainContext,
and not after the blocking call returns.
2) like 1), but after making the blocking g_dbus_connection_call_sync(),
update the NMClient cache artificially. This is what
nm_manager_check_connectivity() does, to "fix" bgo#784629.
This also has the problem of out-of-order events, but it kinda
solves the problem of not changing the state during the blocking
call. But it does so by hacking the state of the cache. I think
this is really wrong because the state should only be updated from
the ordered stream of D-Bus messages (PropertiesChanged signal and
similar). When libnm decides to modify the state, there may be already
D-Bus messages queued that affect this very state.
3) instead of calling g_dbus_connection_call_sync(), use the
asynchronous g_dbus_connection_call(). If we would use a sepaate
GMainContext for all D-Bus related calls, we could ensure that
while we block for the response, we iterate that internal main context.
This might be nice, because all events are processed in order and
after the blocking call returns, the NMClient state is up to date.
The are problems however: current blocking API does not do this,
so it's a significant change in behavior. Also, it might be
unexpected to the user that during the blocking call the entire
content of NMClient's cache might change and all pointers to the
cache might be invalidated. Also, of course NMClient would invoke
signals for all the changes that happen.
Another problem is that this would be more effort to implement
and it involves a small performance overhead for all D-Bus related
calls (because we have to serialize all events in an internal
GMainContext first and then invoke them on the caller's context).
Also, if the users wants this behavior, they could implement it themself
by running libnm in their own GMainContext. Note that libnm might
have bugs to make that really working, but that should be fixed
instead of adding such synchrnous API behavior.
Read also [1], for why blocking calls are wrong.
[1] https://smcv.pseudorandom.co.uk/2008/11/nonblocking/
So, all possible behaviors for synchronous API have severe behavioural
issues. Mark all this API as deprecated. Also, this serves the purpose of
identifying blocking D-Bus calls in libnm.
Note that "deprecated" here does not really mean that the API is going
to be removed. We don't break API. The user may:
- continue to use this API. It's deprecated, awkward and discouraged,
but if it works, by all means use it.
- use asynchronous API. That's the only sensible way to use D-Bus.
If libnm lacks a certain asynchronous counterpart, it should be
added.
- use GDBusConnection directly. There really isn't anything wrong
with D-Bus or GDBusConnection. This deprecated API is just a wrapper
around g_dbus_connection_call_sync(). You may call it directly
without feeling dirty.
---
The only other remainging API is the synchronous GInitable call for
NMClient. That is an entirely separate beast and not particularly
wrong (from an API point of view).
Note that synchronous API in NMSecretAgentOld, NMVpnPluginOld and
NMVpnServicePlugin as not deprecated here. These types are not part
of the D-Bus cache and while they have similar issues, it's less severe
because they have less state.
2019-09-04 13:58:43 +02:00
|
|
|
*
|
|
|
|
|
* The property setter is a synchronous D-Bus call. This is deprecated since 1.22.
|
|
|
|
|
*/
|
2019-10-10 17:57:30 +02:00
|
|
|
obj_properties[PROP_NETWORKING_ENABLED] =
|
|
|
|
|
g_param_spec_boolean(NM_CLIENT_NETWORKING_ENABLED,
|
|
|
|
|
"",
|
|
|
|
|
"",
|
2019-10-21 18:34:09 +02:00
|
|
|
FALSE,
|
2019-10-10 17:57:30 +02:00
|
|
|
G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2014-07-24 08:53:33 -04:00
|
|
|
/**
|
|
|
|
|
* NMClient:wireless-enabled:
|
|
|
|
|
*
|
|
|
|
|
* Whether wireless is enabled.
|
libnm: deprecate synchronous/blocking API in libnm
Note that D-Bus is fundamentally asynchronous. Doing blocking calls
on top of D-Bus is odd, especially for libnm's NMClient. That is because
NMClient essentially is a client-side cache of the objects from the D-Bus
interface. This cache should be filled exclusively by (asynchronous) D-Bus
events (PropertiesChanged). So, making a blocking D-Bus call means to wait
for a response and return it, while queuing all messages that are received
in the meantime.
Basically there are three ways how a synchronous API on NMClient could behave:
1) the call just calls g_dbus_connection_call_sync(). This means
that libnm sends a D-Bus request via GDBusConnection, and blockingly
waits for the response. All D-Bus messages that get received in the
meantime are queued in the GMainContext that belongs to NMClient.
That means, none of these D-Bus events are processed until we
iterate the GMainContext after the call returns. The effect is,
that NMClient (and all cached objects in there) are unaffected by
the D-Bus request.
Most of the synchronous API calls in libnm are of this kind.
The problem is that the strict ordering of D-Bus events gets
violated.
For some API this is not an immediate problem. Take for example
nm_device_wifi_request_scan(). The call merely blockingly tells
NetworkManager to start scanning, but since NetworkManager's D-Bus
API does not directly expose any state that tells whether we are
currently scanning, this out of order processing of the D-Bus
request is a small issue.
The problem is more obvious for nm_client_networking_set_enabled().
After calling it, NM_CLIENT_NETWORKING_ENABLED is still unaffected
and unchanged, because the PropertiesChanged signal from D-Bus
is not yet processed.
This means, while you make such a blocking call, NMClient's state
does not change. But usually you perform the synchronous call
to change some state. In this form, the blocking call is not useful,
because NMClient only changes the state after iterating the GMainContext,
and not after the blocking call returns.
2) like 1), but after making the blocking g_dbus_connection_call_sync(),
update the NMClient cache artificially. This is what
nm_manager_check_connectivity() does, to "fix" bgo#784629.
This also has the problem of out-of-order events, but it kinda
solves the problem of not changing the state during the blocking
call. But it does so by hacking the state of the cache. I think
this is really wrong because the state should only be updated from
the ordered stream of D-Bus messages (PropertiesChanged signal and
similar). When libnm decides to modify the state, there may be already
D-Bus messages queued that affect this very state.
3) instead of calling g_dbus_connection_call_sync(), use the
asynchronous g_dbus_connection_call(). If we would use a sepaate
GMainContext for all D-Bus related calls, we could ensure that
while we block for the response, we iterate that internal main context.
This might be nice, because all events are processed in order and
after the blocking call returns, the NMClient state is up to date.
The are problems however: current blocking API does not do this,
so it's a significant change in behavior. Also, it might be
unexpected to the user that during the blocking call the entire
content of NMClient's cache might change and all pointers to the
cache might be invalidated. Also, of course NMClient would invoke
signals for all the changes that happen.
Another problem is that this would be more effort to implement
and it involves a small performance overhead for all D-Bus related
calls (because we have to serialize all events in an internal
GMainContext first and then invoke them on the caller's context).
Also, if the users wants this behavior, they could implement it themself
by running libnm in their own GMainContext. Note that libnm might
have bugs to make that really working, but that should be fixed
instead of adding such synchrnous API behavior.
Read also [1], for why blocking calls are wrong.
[1] https://smcv.pseudorandom.co.uk/2008/11/nonblocking/
So, all possible behaviors for synchronous API have severe behavioural
issues. Mark all this API as deprecated. Also, this serves the purpose of
identifying blocking D-Bus calls in libnm.
Note that "deprecated" here does not really mean that the API is going
to be removed. We don't break API. The user may:
- continue to use this API. It's deprecated, awkward and discouraged,
but if it works, by all means use it.
- use asynchronous API. That's the only sensible way to use D-Bus.
If libnm lacks a certain asynchronous counterpart, it should be
added.
- use GDBusConnection directly. There really isn't anything wrong
with D-Bus or GDBusConnection. This deprecated API is just a wrapper
around g_dbus_connection_call_sync(). You may call it directly
without feeling dirty.
---
The only other remainging API is the synchronous GInitable call for
NMClient. That is an entirely separate beast and not particularly
wrong (from an API point of view).
Note that synchronous API in NMSecretAgentOld, NMVpnPluginOld and
NMVpnServicePlugin as not deprecated here. These types are not part
of the D-Bus cache and while they have similar issues, it's less severe
because they have less state.
2019-09-04 13:58:43 +02:00
|
|
|
*
|
|
|
|
|
* The property setter is a synchronous D-Bus call. This is deprecated since 1.22.
|
2014-07-24 08:53:33 -04:00
|
|
|
**/
|
2019-10-10 17:57:30 +02:00
|
|
|
obj_properties[PROP_WIRELESS_ENABLED] =
|
|
|
|
|
g_param_spec_boolean(NM_CLIENT_WIRELESS_ENABLED,
|
|
|
|
|
"",
|
|
|
|
|
"",
|
|
|
|
|
FALSE,
|
|
|
|
|
G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2014-07-24 08:53:33 -04:00
|
|
|
/**
|
|
|
|
|
* NMClient:wireless-hardware-enabled:
|
|
|
|
|
*
|
|
|
|
|
* Whether the wireless hardware is enabled.
|
|
|
|
|
**/
|
2019-10-10 17:57:30 +02:00
|
|
|
obj_properties[PROP_WIRELESS_HARDWARE_ENABLED] =
|
|
|
|
|
g_param_spec_boolean(NM_CLIENT_WIRELESS_HARDWARE_ENABLED,
|
|
|
|
|
"",
|
|
|
|
|
"",
|
2019-10-21 18:34:09 +02:00
|
|
|
FALSE,
|
2019-10-10 17:57:30 +02:00
|
|
|
G_PARAM_READABLE | G_PARAM_STATIC_STRINGS);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2014-07-24 08:53:33 -04:00
|
|
|
/**
|
|
|
|
|
* NMClient:wwan-enabled:
|
|
|
|
|
*
|
|
|
|
|
* Whether WWAN functionality is enabled.
|
libnm: deprecate synchronous/blocking API in libnm
Note that D-Bus is fundamentally asynchronous. Doing blocking calls
on top of D-Bus is odd, especially for libnm's NMClient. That is because
NMClient essentially is a client-side cache of the objects from the D-Bus
interface. This cache should be filled exclusively by (asynchronous) D-Bus
events (PropertiesChanged). So, making a blocking D-Bus call means to wait
for a response and return it, while queuing all messages that are received
in the meantime.
Basically there are three ways how a synchronous API on NMClient could behave:
1) the call just calls g_dbus_connection_call_sync(). This means
that libnm sends a D-Bus request via GDBusConnection, and blockingly
waits for the response. All D-Bus messages that get received in the
meantime are queued in the GMainContext that belongs to NMClient.
That means, none of these D-Bus events are processed until we
iterate the GMainContext after the call returns. The effect is,
that NMClient (and all cached objects in there) are unaffected by
the D-Bus request.
Most of the synchronous API calls in libnm are of this kind.
The problem is that the strict ordering of D-Bus events gets
violated.
For some API this is not an immediate problem. Take for example
nm_device_wifi_request_scan(). The call merely blockingly tells
NetworkManager to start scanning, but since NetworkManager's D-Bus
API does not directly expose any state that tells whether we are
currently scanning, this out of order processing of the D-Bus
request is a small issue.
The problem is more obvious for nm_client_networking_set_enabled().
After calling it, NM_CLIENT_NETWORKING_ENABLED is still unaffected
and unchanged, because the PropertiesChanged signal from D-Bus
is not yet processed.
This means, while you make such a blocking call, NMClient's state
does not change. But usually you perform the synchronous call
to change some state. In this form, the blocking call is not useful,
because NMClient only changes the state after iterating the GMainContext,
and not after the blocking call returns.
2) like 1), but after making the blocking g_dbus_connection_call_sync(),
update the NMClient cache artificially. This is what
nm_manager_check_connectivity() does, to "fix" bgo#784629.
This also has the problem of out-of-order events, but it kinda
solves the problem of not changing the state during the blocking
call. But it does so by hacking the state of the cache. I think
this is really wrong because the state should only be updated from
the ordered stream of D-Bus messages (PropertiesChanged signal and
similar). When libnm decides to modify the state, there may be already
D-Bus messages queued that affect this very state.
3) instead of calling g_dbus_connection_call_sync(), use the
asynchronous g_dbus_connection_call(). If we would use a sepaate
GMainContext for all D-Bus related calls, we could ensure that
while we block for the response, we iterate that internal main context.
This might be nice, because all events are processed in order and
after the blocking call returns, the NMClient state is up to date.
The are problems however: current blocking API does not do this,
so it's a significant change in behavior. Also, it might be
unexpected to the user that during the blocking call the entire
content of NMClient's cache might change and all pointers to the
cache might be invalidated. Also, of course NMClient would invoke
signals for all the changes that happen.
Another problem is that this would be more effort to implement
and it involves a small performance overhead for all D-Bus related
calls (because we have to serialize all events in an internal
GMainContext first and then invoke them on the caller's context).
Also, if the users wants this behavior, they could implement it themself
by running libnm in their own GMainContext. Note that libnm might
have bugs to make that really working, but that should be fixed
instead of adding such synchrnous API behavior.
Read also [1], for why blocking calls are wrong.
[1] https://smcv.pseudorandom.co.uk/2008/11/nonblocking/
So, all possible behaviors for synchronous API have severe behavioural
issues. Mark all this API as deprecated. Also, this serves the purpose of
identifying blocking D-Bus calls in libnm.
Note that "deprecated" here does not really mean that the API is going
to be removed. We don't break API. The user may:
- continue to use this API. It's deprecated, awkward and discouraged,
but if it works, by all means use it.
- use asynchronous API. That's the only sensible way to use D-Bus.
If libnm lacks a certain asynchronous counterpart, it should be
added.
- use GDBusConnection directly. There really isn't anything wrong
with D-Bus or GDBusConnection. This deprecated API is just a wrapper
around g_dbus_connection_call_sync(). You may call it directly
without feeling dirty.
---
The only other remainging API is the synchronous GInitable call for
NMClient. That is an entirely separate beast and not particularly
wrong (from an API point of view).
Note that synchronous API in NMSecretAgentOld, NMVpnPluginOld and
NMVpnServicePlugin as not deprecated here. These types are not part
of the D-Bus cache and while they have similar issues, it's less severe
because they have less state.
2019-09-04 13:58:43 +02:00
|
|
|
*
|
|
|
|
|
* The property setter is a synchronous D-Bus call. This is deprecated since 1.22.
|
|
|
|
|
*/
|
2019-10-10 17:57:30 +02:00
|
|
|
obj_properties[PROP_WWAN_ENABLED] =
|
|
|
|
|
g_param_spec_boolean(NM_CLIENT_WWAN_ENABLED,
|
|
|
|
|
"",
|
|
|
|
|
"",
|
|
|
|
|
FALSE,
|
|
|
|
|
G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2014-07-24 08:53:33 -04:00
|
|
|
/**
|
|
|
|
|
* NMClient:wwan-hardware-enabled:
|
|
|
|
|
*
|
|
|
|
|
* Whether the WWAN hardware is enabled.
|
|
|
|
|
**/
|
2019-10-10 17:57:30 +02:00
|
|
|
obj_properties[PROP_WWAN_HARDWARE_ENABLED] =
|
|
|
|
|
g_param_spec_boolean(NM_CLIENT_WWAN_HARDWARE_ENABLED,
|
|
|
|
|
"",
|
|
|
|
|
"",
|
|
|
|
|
FALSE,
|
|
|
|
|
G_PARAM_READABLE | G_PARAM_STATIC_STRINGS);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2014-07-24 08:53:33 -04:00
|
|
|
/**
|
|
|
|
|
* NMClient:wimax-enabled:
|
|
|
|
|
*
|
|
|
|
|
* Whether WiMAX functionality is enabled.
|
libnm: deprecate synchronous/blocking API in libnm
Note that D-Bus is fundamentally asynchronous. Doing blocking calls
on top of D-Bus is odd, especially for libnm's NMClient. That is because
NMClient essentially is a client-side cache of the objects from the D-Bus
interface. This cache should be filled exclusively by (asynchronous) D-Bus
events (PropertiesChanged). So, making a blocking D-Bus call means to wait
for a response and return it, while queuing all messages that are received
in the meantime.
Basically there are three ways how a synchronous API on NMClient could behave:
1) the call just calls g_dbus_connection_call_sync(). This means
that libnm sends a D-Bus request via GDBusConnection, and blockingly
waits for the response. All D-Bus messages that get received in the
meantime are queued in the GMainContext that belongs to NMClient.
That means, none of these D-Bus events are processed until we
iterate the GMainContext after the call returns. The effect is,
that NMClient (and all cached objects in there) are unaffected by
the D-Bus request.
Most of the synchronous API calls in libnm are of this kind.
The problem is that the strict ordering of D-Bus events gets
violated.
For some API this is not an immediate problem. Take for example
nm_device_wifi_request_scan(). The call merely blockingly tells
NetworkManager to start scanning, but since NetworkManager's D-Bus
API does not directly expose any state that tells whether we are
currently scanning, this out of order processing of the D-Bus
request is a small issue.
The problem is more obvious for nm_client_networking_set_enabled().
After calling it, NM_CLIENT_NETWORKING_ENABLED is still unaffected
and unchanged, because the PropertiesChanged signal from D-Bus
is not yet processed.
This means, while you make such a blocking call, NMClient's state
does not change. But usually you perform the synchronous call
to change some state. In this form, the blocking call is not useful,
because NMClient only changes the state after iterating the GMainContext,
and not after the blocking call returns.
2) like 1), but after making the blocking g_dbus_connection_call_sync(),
update the NMClient cache artificially. This is what
nm_manager_check_connectivity() does, to "fix" bgo#784629.
This also has the problem of out-of-order events, but it kinda
solves the problem of not changing the state during the blocking
call. But it does so by hacking the state of the cache. I think
this is really wrong because the state should only be updated from
the ordered stream of D-Bus messages (PropertiesChanged signal and
similar). When libnm decides to modify the state, there may be already
D-Bus messages queued that affect this very state.
3) instead of calling g_dbus_connection_call_sync(), use the
asynchronous g_dbus_connection_call(). If we would use a sepaate
GMainContext for all D-Bus related calls, we could ensure that
while we block for the response, we iterate that internal main context.
This might be nice, because all events are processed in order and
after the blocking call returns, the NMClient state is up to date.
The are problems however: current blocking API does not do this,
so it's a significant change in behavior. Also, it might be
unexpected to the user that during the blocking call the entire
content of NMClient's cache might change and all pointers to the
cache might be invalidated. Also, of course NMClient would invoke
signals for all the changes that happen.
Another problem is that this would be more effort to implement
and it involves a small performance overhead for all D-Bus related
calls (because we have to serialize all events in an internal
GMainContext first and then invoke them on the caller's context).
Also, if the users wants this behavior, they could implement it themself
by running libnm in their own GMainContext. Note that libnm might
have bugs to make that really working, but that should be fixed
instead of adding such synchrnous API behavior.
Read also [1], for why blocking calls are wrong.
[1] https://smcv.pseudorandom.co.uk/2008/11/nonblocking/
So, all possible behaviors for synchronous API have severe behavioural
issues. Mark all this API as deprecated. Also, this serves the purpose of
identifying blocking D-Bus calls in libnm.
Note that "deprecated" here does not really mean that the API is going
to be removed. We don't break API. The user may:
- continue to use this API. It's deprecated, awkward and discouraged,
but if it works, by all means use it.
- use asynchronous API. That's the only sensible way to use D-Bus.
If libnm lacks a certain asynchronous counterpart, it should be
added.
- use GDBusConnection directly. There really isn't anything wrong
with D-Bus or GDBusConnection. This deprecated API is just a wrapper
around g_dbus_connection_call_sync(). You may call it directly
without feeling dirty.
---
The only other remainging API is the synchronous GInitable call for
NMClient. That is an entirely separate beast and not particularly
wrong (from an API point of view).
Note that synchronous API in NMSecretAgentOld, NMVpnPluginOld and
NMVpnServicePlugin as not deprecated here. These types are not part
of the D-Bus cache and while they have similar issues, it's less severe
because they have less state.
2019-09-04 13:58:43 +02:00
|
|
|
*
|
2019-10-29 20:05:02 +01:00
|
|
|
* Deprecated: 1.22: WiMAX is no longer supported and this always returns FALSE. The setter has no effect.
|
libnm: deprecate synchronous/blocking API in libnm
Note that D-Bus is fundamentally asynchronous. Doing blocking calls
on top of D-Bus is odd, especially for libnm's NMClient. That is because
NMClient essentially is a client-side cache of the objects from the D-Bus
interface. This cache should be filled exclusively by (asynchronous) D-Bus
events (PropertiesChanged). So, making a blocking D-Bus call means to wait
for a response and return it, while queuing all messages that are received
in the meantime.
Basically there are three ways how a synchronous API on NMClient could behave:
1) the call just calls g_dbus_connection_call_sync(). This means
that libnm sends a D-Bus request via GDBusConnection, and blockingly
waits for the response. All D-Bus messages that get received in the
meantime are queued in the GMainContext that belongs to NMClient.
That means, none of these D-Bus events are processed until we
iterate the GMainContext after the call returns. The effect is,
that NMClient (and all cached objects in there) are unaffected by
the D-Bus request.
Most of the synchronous API calls in libnm are of this kind.
The problem is that the strict ordering of D-Bus events gets
violated.
For some API this is not an immediate problem. Take for example
nm_device_wifi_request_scan(). The call merely blockingly tells
NetworkManager to start scanning, but since NetworkManager's D-Bus
API does not directly expose any state that tells whether we are
currently scanning, this out of order processing of the D-Bus
request is a small issue.
The problem is more obvious for nm_client_networking_set_enabled().
After calling it, NM_CLIENT_NETWORKING_ENABLED is still unaffected
and unchanged, because the PropertiesChanged signal from D-Bus
is not yet processed.
This means, while you make such a blocking call, NMClient's state
does not change. But usually you perform the synchronous call
to change some state. In this form, the blocking call is not useful,
because NMClient only changes the state after iterating the GMainContext,
and not after the blocking call returns.
2) like 1), but after making the blocking g_dbus_connection_call_sync(),
update the NMClient cache artificially. This is what
nm_manager_check_connectivity() does, to "fix" bgo#784629.
This also has the problem of out-of-order events, but it kinda
solves the problem of not changing the state during the blocking
call. But it does so by hacking the state of the cache. I think
this is really wrong because the state should only be updated from
the ordered stream of D-Bus messages (PropertiesChanged signal and
similar). When libnm decides to modify the state, there may be already
D-Bus messages queued that affect this very state.
3) instead of calling g_dbus_connection_call_sync(), use the
asynchronous g_dbus_connection_call(). If we would use a sepaate
GMainContext for all D-Bus related calls, we could ensure that
while we block for the response, we iterate that internal main context.
This might be nice, because all events are processed in order and
after the blocking call returns, the NMClient state is up to date.
The are problems however: current blocking API does not do this,
so it's a significant change in behavior. Also, it might be
unexpected to the user that during the blocking call the entire
content of NMClient's cache might change and all pointers to the
cache might be invalidated. Also, of course NMClient would invoke
signals for all the changes that happen.
Another problem is that this would be more effort to implement
and it involves a small performance overhead for all D-Bus related
calls (because we have to serialize all events in an internal
GMainContext first and then invoke them on the caller's context).
Also, if the users wants this behavior, they could implement it themself
by running libnm in their own GMainContext. Note that libnm might
have bugs to make that really working, but that should be fixed
instead of adding such synchrnous API behavior.
Read also [1], for why blocking calls are wrong.
[1] https://smcv.pseudorandom.co.uk/2008/11/nonblocking/
So, all possible behaviors for synchronous API have severe behavioural
issues. Mark all this API as deprecated. Also, this serves the purpose of
identifying blocking D-Bus calls in libnm.
Note that "deprecated" here does not really mean that the API is going
to be removed. We don't break API. The user may:
- continue to use this API. It's deprecated, awkward and discouraged,
but if it works, by all means use it.
- use asynchronous API. That's the only sensible way to use D-Bus.
If libnm lacks a certain asynchronous counterpart, it should be
added.
- use GDBusConnection directly. There really isn't anything wrong
with D-Bus or GDBusConnection. This deprecated API is just a wrapper
around g_dbus_connection_call_sync(). You may call it directly
without feeling dirty.
---
The only other remainging API is the synchronous GInitable call for
NMClient. That is an entirely separate beast and not particularly
wrong (from an API point of view).
Note that synchronous API in NMSecretAgentOld, NMVpnPluginOld and
NMVpnServicePlugin as not deprecated here. These types are not part
of the D-Bus cache and while they have similar issues, it's less severe
because they have less state.
2019-09-04 13:58:43 +02:00
|
|
|
*/
|
2019-10-10 17:57:30 +02:00
|
|
|
obj_properties[PROP_WIMAX_ENABLED] =
|
|
|
|
|
g_param_spec_boolean(NM_CLIENT_WIMAX_ENABLED,
|
|
|
|
|
"",
|
|
|
|
|
"",
|
|
|
|
|
FALSE,
|
|
|
|
|
G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2014-07-24 08:53:33 -04:00
|
|
|
/**
|
|
|
|
|
* NMClient:wimax-hardware-enabled:
|
|
|
|
|
*
|
|
|
|
|
* Whether the WiMAX hardware is enabled.
|
2019-10-29 20:05:02 +01:00
|
|
|
*
|
|
|
|
|
* Deprecated: 1.22: WiMAX is no longer supported and this always returns FALSE.
|
2014-07-24 08:53:33 -04:00
|
|
|
**/
|
2019-10-10 17:57:30 +02:00
|
|
|
obj_properties[PROP_WIMAX_HARDWARE_ENABLED] =
|
|
|
|
|
g_param_spec_boolean(NM_CLIENT_WIMAX_HARDWARE_ENABLED,
|
|
|
|
|
"",
|
|
|
|
|
"",
|
|
|
|
|
FALSE,
|
|
|
|
|
G_PARAM_READABLE | G_PARAM_STATIC_STRINGS);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2022-03-21 10:20:11 +01:00
|
|
|
/**
|
|
|
|
|
* NMClient:radio-flags:
|
|
|
|
|
*
|
|
|
|
|
* Flags for radio interfaces. See #NMRadioFlags.
|
|
|
|
|
*
|
|
|
|
|
* Since: 1.38
|
|
|
|
|
**/
|
|
|
|
|
obj_properties[PROP_RADIO_FLAGS] = g_param_spec_uint(NM_CLIENT_RADIO_FLAGS,
|
|
|
|
|
"",
|
|
|
|
|
"",
|
|
|
|
|
0,
|
|
|
|
|
G_MAXUINT32,
|
|
|
|
|
NM_RADIO_FLAG_NONE,
|
|
|
|
|
G_PARAM_READABLE | G_PARAM_STATIC_STRINGS);
|
|
|
|
|
|
2014-07-24 08:53:33 -04:00
|
|
|
/**
|
2018-03-24 15:18:21 +00:00
|
|
|
* NMClient:active-connections: (type GPtrArray(NMActiveConnection))
|
2014-07-24 08:53:33 -04:00
|
|
|
*
|
|
|
|
|
* The active connections.
|
|
|
|
|
**/
|
2019-10-10 17:57:30 +02:00
|
|
|
obj_properties[PROP_ACTIVE_CONNECTIONS] =
|
|
|
|
|
g_param_spec_boxed(NM_CLIENT_ACTIVE_CONNECTIONS,
|
|
|
|
|
"",
|
|
|
|
|
"",
|
|
|
|
|
G_TYPE_PTR_ARRAY,
|
|
|
|
|
G_PARAM_READABLE | G_PARAM_STATIC_STRINGS);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2014-07-24 08:53:33 -04:00
|
|
|
/**
|
|
|
|
|
* NMClient:connectivity:
|
|
|
|
|
*
|
|
|
|
|
* The network connectivity state.
|
|
|
|
|
*/
|
2019-10-10 17:57:30 +02:00
|
|
|
obj_properties[PROP_CONNECTIVITY] =
|
|
|
|
|
g_param_spec_enum(NM_CLIENT_CONNECTIVITY,
|
|
|
|
|
"",
|
|
|
|
|
"",
|
|
|
|
|
NM_TYPE_CONNECTIVITY_STATE,
|
|
|
|
|
NM_CONNECTIVITY_UNKNOWN,
|
|
|
|
|
G_PARAM_READABLE | G_PARAM_STATIC_STRINGS);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2017-08-09 15:21:28 +08:00
|
|
|
/**
|
|
|
|
|
* NMClient::connectivity-check-available
|
|
|
|
|
*
|
|
|
|
|
* Whether a connectivity checking service has been configured.
|
2017-08-17 23:08:44 +02:00
|
|
|
*
|
|
|
|
|
* Since: 1.10
|
2017-08-09 15:21:28 +08:00
|
|
|
*/
|
2019-10-10 17:57:30 +02:00
|
|
|
obj_properties[PROP_CONNECTIVITY_CHECK_AVAILABLE] =
|
|
|
|
|
g_param_spec_boolean(NM_CLIENT_CONNECTIVITY_CHECK_AVAILABLE,
|
|
|
|
|
"",
|
|
|
|
|
"",
|
|
|
|
|
FALSE,
|
|
|
|
|
G_PARAM_READABLE | G_PARAM_STATIC_STRINGS);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2017-08-09 15:21:28 +08:00
|
|
|
/**
|
|
|
|
|
* NMClient::connectivity-check-enabled
|
|
|
|
|
*
|
|
|
|
|
* Whether a connectivity checking service has been enabled.
|
2017-08-17 23:08:44 +02:00
|
|
|
*
|
libnm: deprecate synchronous/blocking API in libnm
Note that D-Bus is fundamentally asynchronous. Doing blocking calls
on top of D-Bus is odd, especially for libnm's NMClient. That is because
NMClient essentially is a client-side cache of the objects from the D-Bus
interface. This cache should be filled exclusively by (asynchronous) D-Bus
events (PropertiesChanged). So, making a blocking D-Bus call means to wait
for a response and return it, while queuing all messages that are received
in the meantime.
Basically there are three ways how a synchronous API on NMClient could behave:
1) the call just calls g_dbus_connection_call_sync(). This means
that libnm sends a D-Bus request via GDBusConnection, and blockingly
waits for the response. All D-Bus messages that get received in the
meantime are queued in the GMainContext that belongs to NMClient.
That means, none of these D-Bus events are processed until we
iterate the GMainContext after the call returns. The effect is,
that NMClient (and all cached objects in there) are unaffected by
the D-Bus request.
Most of the synchronous API calls in libnm are of this kind.
The problem is that the strict ordering of D-Bus events gets
violated.
For some API this is not an immediate problem. Take for example
nm_device_wifi_request_scan(). The call merely blockingly tells
NetworkManager to start scanning, but since NetworkManager's D-Bus
API does not directly expose any state that tells whether we are
currently scanning, this out of order processing of the D-Bus
request is a small issue.
The problem is more obvious for nm_client_networking_set_enabled().
After calling it, NM_CLIENT_NETWORKING_ENABLED is still unaffected
and unchanged, because the PropertiesChanged signal from D-Bus
is not yet processed.
This means, while you make such a blocking call, NMClient's state
does not change. But usually you perform the synchronous call
to change some state. In this form, the blocking call is not useful,
because NMClient only changes the state after iterating the GMainContext,
and not after the blocking call returns.
2) like 1), but after making the blocking g_dbus_connection_call_sync(),
update the NMClient cache artificially. This is what
nm_manager_check_connectivity() does, to "fix" bgo#784629.
This also has the problem of out-of-order events, but it kinda
solves the problem of not changing the state during the blocking
call. But it does so by hacking the state of the cache. I think
this is really wrong because the state should only be updated from
the ordered stream of D-Bus messages (PropertiesChanged signal and
similar). When libnm decides to modify the state, there may be already
D-Bus messages queued that affect this very state.
3) instead of calling g_dbus_connection_call_sync(), use the
asynchronous g_dbus_connection_call(). If we would use a sepaate
GMainContext for all D-Bus related calls, we could ensure that
while we block for the response, we iterate that internal main context.
This might be nice, because all events are processed in order and
after the blocking call returns, the NMClient state is up to date.
The are problems however: current blocking API does not do this,
so it's a significant change in behavior. Also, it might be
unexpected to the user that during the blocking call the entire
content of NMClient's cache might change and all pointers to the
cache might be invalidated. Also, of course NMClient would invoke
signals for all the changes that happen.
Another problem is that this would be more effort to implement
and it involves a small performance overhead for all D-Bus related
calls (because we have to serialize all events in an internal
GMainContext first and then invoke them on the caller's context).
Also, if the users wants this behavior, they could implement it themself
by running libnm in their own GMainContext. Note that libnm might
have bugs to make that really working, but that should be fixed
instead of adding such synchrnous API behavior.
Read also [1], for why blocking calls are wrong.
[1] https://smcv.pseudorandom.co.uk/2008/11/nonblocking/
So, all possible behaviors for synchronous API have severe behavioural
issues. Mark all this API as deprecated. Also, this serves the purpose of
identifying blocking D-Bus calls in libnm.
Note that "deprecated" here does not really mean that the API is going
to be removed. We don't break API. The user may:
- continue to use this API. It's deprecated, awkward and discouraged,
but if it works, by all means use it.
- use asynchronous API. That's the only sensible way to use D-Bus.
If libnm lacks a certain asynchronous counterpart, it should be
added.
- use GDBusConnection directly. There really isn't anything wrong
with D-Bus or GDBusConnection. This deprecated API is just a wrapper
around g_dbus_connection_call_sync(). You may call it directly
without feeling dirty.
---
The only other remainging API is the synchronous GInitable call for
NMClient. That is an entirely separate beast and not particularly
wrong (from an API point of view).
Note that synchronous API in NMSecretAgentOld, NMVpnPluginOld and
NMVpnServicePlugin as not deprecated here. These types are not part
of the D-Bus cache and while they have similar issues, it's less severe
because they have less state.
2019-09-04 13:58:43 +02:00
|
|
|
* The property setter is a synchronous D-Bus call. This is deprecated since 1.22.
|
2022-05-05 12:04:36 +02:00
|
|
|
*
|
|
|
|
|
* Since: 1.10
|
2017-08-09 15:21:28 +08:00
|
|
|
*/
|
2019-10-10 17:57:30 +02:00
|
|
|
obj_properties[PROP_CONNECTIVITY_CHECK_ENABLED] =
|
|
|
|
|
g_param_spec_boolean(NM_CLIENT_CONNECTIVITY_CHECK_ENABLED,
|
|
|
|
|
"",
|
|
|
|
|
"",
|
|
|
|
|
FALSE,
|
|
|
|
|
G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
/**
|
|
|
|
|
* NMClient:connectivity-check-uri:
|
|
|
|
|
*
|
|
|
|
|
* The used URI for connectivity checking.
|
|
|
|
|
*
|
|
|
|
|
* Since: 1.22
|
|
|
|
|
**/
|
|
|
|
|
obj_properties[PROP_CONNECTIVITY_CHECK_URI] =
|
|
|
|
|
g_param_spec_string(NM_CLIENT_CONNECTIVITY_CHECK_URI,
|
|
|
|
|
"",
|
|
|
|
|
"",
|
|
|
|
|
NULL,
|
|
|
|
|
G_PARAM_READABLE | G_PARAM_STATIC_STRINGS);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2014-07-24 08:53:33 -04:00
|
|
|
/**
|
|
|
|
|
* NMClient:primary-connection:
|
|
|
|
|
*
|
|
|
|
|
* The #NMActiveConnection of the device with the default route;
|
|
|
|
|
* see nm_client_get_primary_connection() for more details.
|
|
|
|
|
**/
|
2019-10-10 17:57:30 +02:00
|
|
|
obj_properties[PROP_PRIMARY_CONNECTION] =
|
|
|
|
|
g_param_spec_object(NM_CLIENT_PRIMARY_CONNECTION,
|
|
|
|
|
"",
|
|
|
|
|
"",
|
|
|
|
|
NM_TYPE_ACTIVE_CONNECTION,
|
|
|
|
|
G_PARAM_READABLE | G_PARAM_STATIC_STRINGS);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2014-07-24 08:53:33 -04:00
|
|
|
/**
|
|
|
|
|
* NMClient:activating-connection:
|
|
|
|
|
*
|
|
|
|
|
* The #NMActiveConnection of the activating connection that is
|
|
|
|
|
* likely to become the new #NMClient:primary-connection.
|
|
|
|
|
**/
|
2019-10-10 17:57:30 +02:00
|
|
|
obj_properties[PROP_ACTIVATING_CONNECTION] =
|
|
|
|
|
g_param_spec_object(NM_CLIENT_ACTIVATING_CONNECTION,
|
|
|
|
|
"",
|
|
|
|
|
"",
|
|
|
|
|
NM_TYPE_ACTIVE_CONNECTION,
|
|
|
|
|
G_PARAM_READABLE | G_PARAM_STATIC_STRINGS);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2014-07-24 08:53:33 -04:00
|
|
|
/**
|
2018-03-24 15:18:21 +00:00
|
|
|
* NMClient:devices: (type GPtrArray(NMDevice))
|
2014-07-24 08:53:33 -04:00
|
|
|
*
|
2014-10-10 14:28:49 -05:00
|
|
|
* List of real network devices. Does not include placeholder devices.
|
2014-07-24 08:53:33 -04:00
|
|
|
**/
|
2019-10-10 17:57:30 +02:00
|
|
|
obj_properties[PROP_DEVICES] = g_param_spec_boxed(NM_CLIENT_DEVICES,
|
|
|
|
|
"",
|
|
|
|
|
"",
|
|
|
|
|
G_TYPE_PTR_ARRAY,
|
|
|
|
|
G_PARAM_READABLE | G_PARAM_STATIC_STRINGS);
|
2014-07-24 08:53:33 -04:00
|
|
|
|
2014-10-10 14:28:49 -05:00
|
|
|
/**
|
2018-03-24 15:18:21 +00:00
|
|
|
* NMClient:all-devices: (type GPtrArray(NMDevice))
|
2014-10-10 14:28:49 -05:00
|
|
|
*
|
|
|
|
|
* List of both real devices and device placeholders.
|
|
|
|
|
* Since: 1.2
|
|
|
|
|
**/
|
2019-10-10 17:57:30 +02:00
|
|
|
obj_properties[PROP_ALL_DEVICES] =
|
|
|
|
|
g_param_spec_boxed(NM_CLIENT_ALL_DEVICES,
|
|
|
|
|
"",
|
|
|
|
|
"",
|
|
|
|
|
G_TYPE_PTR_ARRAY,
|
|
|
|
|
G_PARAM_READABLE | G_PARAM_STATIC_STRINGS);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2014-09-29 10:58:16 -04:00
|
|
|
/**
|
2018-03-24 15:18:21 +00:00
|
|
|
* NMClient:connections: (type GPtrArray(NMRemoteConnection))
|
2014-09-29 10:58:16 -04:00
|
|
|
*
|
|
|
|
|
* The list of configured connections that are available to the user. (Note
|
|
|
|
|
* that this differs from the underlying D-Bus property, which may also
|
|
|
|
|
* contain the object paths of connections that the user does not have
|
|
|
|
|
* permission to read the details of.)
|
|
|
|
|
*/
|
2019-10-10 17:57:30 +02:00
|
|
|
obj_properties[PROP_CONNECTIONS] =
|
|
|
|
|
g_param_spec_boxed(NM_CLIENT_CONNECTIONS,
|
|
|
|
|
"",
|
|
|
|
|
"",
|
|
|
|
|
G_TYPE_PTR_ARRAY,
|
|
|
|
|
G_PARAM_READABLE | G_PARAM_STATIC_STRINGS);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2014-09-29 10:58:16 -04:00
|
|
|
/**
|
|
|
|
|
* NMClient:hostname:
|
|
|
|
|
*
|
|
|
|
|
* The machine hostname stored in persistent configuration. This can be
|
|
|
|
|
* modified by calling nm_client_save_hostname().
|
|
|
|
|
*/
|
2019-10-10 17:57:30 +02:00
|
|
|
obj_properties[PROP_HOSTNAME] = g_param_spec_string(NM_CLIENT_HOSTNAME,
|
|
|
|
|
"",
|
|
|
|
|
"",
|
|
|
|
|
NULL,
|
|
|
|
|
G_PARAM_READABLE | G_PARAM_STATIC_STRINGS);
|
2014-09-29 10:58:16 -04:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* NMClient:can-modify:
|
|
|
|
|
*
|
|
|
|
|
* If %TRUE, adding and modifying connections is supported.
|
|
|
|
|
*/
|
2019-10-10 17:57:30 +02:00
|
|
|
obj_properties[PROP_CAN_MODIFY] =
|
|
|
|
|
g_param_spec_boolean(NM_CLIENT_CAN_MODIFY,
|
|
|
|
|
"",
|
|
|
|
|
"",
|
|
|
|
|
FALSE,
|
|
|
|
|
G_PARAM_READABLE | G_PARAM_STATIC_STRINGS);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2015-06-03 09:15:24 +02:00
|
|
|
/**
|
|
|
|
|
* NMClient:metered:
|
|
|
|
|
*
|
|
|
|
|
* Whether the connectivity is metered.
|
|
|
|
|
*
|
|
|
|
|
* Since: 1.2
|
|
|
|
|
**/
|
2019-10-10 17:57:30 +02:00
|
|
|
obj_properties[PROP_METERED] = g_param_spec_uint(NM_CLIENT_METERED,
|
|
|
|
|
"",
|
|
|
|
|
"",
|
|
|
|
|
0,
|
|
|
|
|
G_MAXUINT32,
|
|
|
|
|
NM_METERED_UNKNOWN,
|
|
|
|
|
G_PARAM_READABLE | G_PARAM_STATIC_STRINGS);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2016-10-25 11:11:12 +02:00
|
|
|
/**
|
|
|
|
|
* NMClient:dns-mode:
|
|
|
|
|
*
|
|
|
|
|
* The current DNS processing mode.
|
|
|
|
|
*
|
|
|
|
|
* Since: 1.6
|
|
|
|
|
**/
|
2019-10-10 17:57:30 +02:00
|
|
|
obj_properties[PROP_DNS_MODE] = g_param_spec_string(NM_CLIENT_DNS_MODE,
|
|
|
|
|
"",
|
|
|
|
|
"",
|
2019-10-21 18:34:09 +02:00
|
|
|
NULL,
|
2019-10-10 17:57:30 +02:00
|
|
|
G_PARAM_READABLE | G_PARAM_STATIC_STRINGS);
|
2016-10-25 11:11:12 +02:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* NMClient:dns-rc-manager:
|
|
|
|
|
*
|
|
|
|
|
* The current resolv.conf management mode.
|
|
|
|
|
*
|
|
|
|
|
* Since: 1.6
|
|
|
|
|
**/
|
2019-10-10 17:57:30 +02:00
|
|
|
obj_properties[PROP_DNS_RC_MANAGER] =
|
|
|
|
|
g_param_spec_string(NM_CLIENT_DNS_RC_MANAGER,
|
|
|
|
|
"",
|
|
|
|
|
"",
|
2019-10-21 18:34:09 +02:00
|
|
|
NULL,
|
2019-10-10 17:57:30 +02:00
|
|
|
G_PARAM_READABLE | G_PARAM_STATIC_STRINGS);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2016-10-25 11:11:12 +02:00
|
|
|
/**
|
2019-03-15 09:41:51 +01:00
|
|
|
* NMClient:dns-configuration: (type GPtrArray(NMDnsEntry))
|
2016-10-25 11:11:12 +02:00
|
|
|
*
|
2019-03-15 09:41:51 +01:00
|
|
|
* The current DNS configuration, represented as an array
|
|
|
|
|
* of #NMDnsEntry objects.
|
2016-10-25 11:11:12 +02:00
|
|
|
*
|
|
|
|
|
* Since: 1.6
|
|
|
|
|
**/
|
2019-10-10 17:57:30 +02:00
|
|
|
obj_properties[PROP_DNS_CONFIGURATION] =
|
|
|
|
|
g_param_spec_boxed(NM_CLIENT_DNS_CONFIGURATION,
|
|
|
|
|
"",
|
|
|
|
|
"",
|
|
|
|
|
G_TYPE_PTR_ARRAY,
|
|
|
|
|
G_PARAM_READABLE | G_PARAM_STATIC_STRINGS);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2017-10-21 16:05:19 +02:00
|
|
|
/**
|
2018-03-24 15:18:21 +00:00
|
|
|
* NMClient:checkpoints: (type GPtrArray(NMCheckpoint))
|
2017-10-21 16:05:19 +02:00
|
|
|
*
|
|
|
|
|
* The list of active checkpoints.
|
|
|
|
|
*
|
|
|
|
|
* Since: 1.12
|
|
|
|
|
*/
|
2019-10-10 17:57:30 +02:00
|
|
|
obj_properties[PROP_CHECKPOINTS] =
|
2019-10-22 14:17:11 +02:00
|
|
|
g_param_spec_boxed(NM_CLIENT_CHECKPOINTS,
|
|
|
|
|
"",
|
|
|
|
|
"",
|
2019-10-10 17:57:30 +02:00
|
|
|
G_TYPE_PTR_ARRAY,
|
|
|
|
|
G_PARAM_READABLE | G_PARAM_STATIC_STRINGS);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2019-12-20 18:10:55 +01:00
|
|
|
/**
|
|
|
|
|
* NMClient:capabilities: (type GArray(guint32))
|
|
|
|
|
*
|
|
|
|
|
* The list of capabilities numbers as guint32 or %NULL if
|
2020-07-04 11:37:01 +03:00
|
|
|
* there are no capabilities. The numeric value correspond
|
2019-12-20 18:10:55 +01:00
|
|
|
* to %NMCapability enum.
|
|
|
|
|
*
|
|
|
|
|
* Since: 1.24
|
|
|
|
|
*/
|
|
|
|
|
obj_properties[PROP_CAPABILITIES] =
|
|
|
|
|
g_param_spec_boxed(NM_CLIENT_CAPABILITIES,
|
|
|
|
|
"",
|
|
|
|
|
"",
|
|
|
|
|
G_TYPE_ARRAY,
|
|
|
|
|
G_PARAM_READABLE | G_PARAM_STATIC_STRINGS);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2019-12-05 15:53:51 +01:00
|
|
|
/**
|
|
|
|
|
* NMClient:permissions-state:
|
|
|
|
|
*
|
|
|
|
|
* The state of the cached permissions. The value %NM_TERNARY_DEFAULT
|
|
|
|
|
* means that no permissions are yet received (or not yet requested).
|
|
|
|
|
* %NM_TERNARY_TRUE means that permissions are received, cached and up
|
|
|
|
|
* to date. %NM_TERNARY_FALSE means that permissions were received and are
|
|
|
|
|
* cached, but in the meantime a "CheckPermissions" signal was received
|
|
|
|
|
* that invalidated the cached permissions.
|
|
|
|
|
* Note that NMClient will always emit a notify::permissions-state signal
|
|
|
|
|
* when a "CheckPermissions" signal got received or after new permissions
|
|
|
|
|
* got received (that is regardless whether the value of the permission state
|
|
|
|
|
* actually changed). With this you can watch the permissions-state property
|
|
|
|
|
* to know whether the permissions are ready. Note that while NMClient has
|
|
|
|
|
* no D-Bus name owner, no permissions are fetched (and this property won't
|
|
|
|
|
* change).
|
|
|
|
|
*
|
|
|
|
|
* Since: 1.24
|
|
|
|
|
*/
|
|
|
|
|
obj_properties[PROP_PERMISSIONS_STATE] =
|
|
|
|
|
g_param_spec_enum(NM_CLIENT_PERMISSIONS_STATE,
|
|
|
|
|
"",
|
|
|
|
|
"",
|
|
|
|
|
NM_TYPE_TERNARY,
|
|
|
|
|
NM_TERNARY_DEFAULT,
|
|
|
|
|
G_PARAM_READABLE | G_PARAM_STATIC_STRINGS);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
libnm: refactor caching of D-Bus objects in NMClient
No longer use GDBusObjectMangaerClient and gdbus-codegen generated classes
for the NMClient cache. Instead, use GDBusConnection directly and a
custom implementation (NMLDBusObject) for caching D-Bus' ObjectManager
data.
CHANGES
-------
- This is a complete rework. I think the previous implementation was
difficult to understand. There were unfixed bugs and nobody understood
the code well enough to fix them. Maybe somebody out there understood the
code, but I certainly did not. At least nobody provided patches to fix those
issues. I do believe that this implementation is more straightforward and
easier to understand. It removes a lot of layers of code. Whether this claim
of simplicity is true, each reader must decide for himself/herself. Note
that it is still fairly complex.
- There was a lingering performance issue with large number of D-Bus
objects. The patch tries hard that the implementation scales well. Of
course, when we cache N objects that have N-to-M references to other,
we still are fundamentally O(N*M) for runtime and memory consumption (with
M being the number of references between objects). But each part should behave
efficiently and well.
- Play well with GMainContext. libnm code (NMClient) is generally not
thread safe. However, it should work to use multiple instances in
parallel, as long as each access to a NMClient is through the caller's
GMainContext. This follows glib's style and effectively allows to use NMClient
in a multi threaded scenario. This implies to stick to a main context
upon construction and ensure that callbacks are only invoked when
iterating that context. Also, NMClient itself shall never iterate the
caller's context. This also means, libnm must never use g_idle_add() or
g_timeout_add(), as those enqueue sources in the g_main_context_default()
context.
- Get ordering of messages right. All events are consistently enqueued
in a GMainContext and processed strictly in order. For example,
previously "nm-object.c" tried to combine signals and emit them on an
idle handler. That is wrong, signals must be emitted in the right order
and when they happen. Note that when using GInitable's synchronous initialization
to initialize the NMClient instance, NMClient internally still operates fully
asynchronously. In that case NMClient has an internal main context.
- NMClient takes over most of the functionality. When using D-Bus'
ObjectManager interface, one needs to handle basically the entire state
of the D-Bus interface. That cannot be separated well into distinct
parts, and even if you try, you just end up having closely related code
in different source files. Spreading related code does not make it
easier to understand, on the contrary. That means, NMClient is
inherently complex as it contains most of the logic. I think that is
not avoidable, but it's not as bad as it sounds.
- NMClient processes D-Bus messages and state changes in separate steps.
First NMClient unpacks the message (e.g. _dbus_handle_properties_changed()) and
keeps track of the changed data. Then we update the GObject instances
(_dbus_handle_obj_changed_dbus()) without emitting any signals yet. Finally,
we emit all signals and notifications that were collected
(_dbus_handle_changes_commit()). Note that for example during the initial
GetManagedObjects() reply, NMClient receive a large amount of state at once.
But we first apply all the changes to our GObject instances before
emitting any signals. The result is that signals are always emitted in a moment
when the cache is consistent. The unavoidable downside is that when you receive
a property changed signal, possibly many other properties changed
already and more signals are about to be emitted.
- NMDeviceWifi no longer modifies the content of the cache from client side
during poke_wireless_devices_with_rf_status(). The content of the cache
should be determined by D-Bus alone and follow what NetworkManager
service exposes. Local modifications should be avoided.
- This aims to bring no API/ABI change, though it does of course bring
various subtle changes in behavior. Those should be all for the better, but the
goal is not to break any existing clients. This does change internal
(albeit externally visible) API, like dropping NM_OBJECT_DBUS_OBJECT_MANAGER
property and NMObject no longer implementing GInitableIface and GAsyncInitableIface.
- Some uses of gdbus-codegen classes remain in NMVpnPluginOld, NMVpnServicePlugin
and NMSecretAgentOld. These are independent of NMClient/NMObject and
should be reworked separately.
- While we no longer use generated classes from gdbus-codegen, we don't
need more glue code than before. Also before we constructed NMPropertiesInfo and
a had large amount of code to propagate properties from NMDBus* to NMObject.
That got completely reworked, but did not fundamentally change. You still need
about the same effort to create the NMLDBusMetaIface. Not using
generated bindings did not make anything worse (which tells about the
usefulness of generated code, at least in the way it was used).
- NMLDBusMetaIface and other meta data is static and immutable. This
avoids copying them around. Also, macros like NML_DBUS_META_PROPERTY_INIT_U()
have compile time checks to ensure the property types matches. It's pretty hard
to misuse them because it won't compile.
- The meta data now explicitly encodes the expected D-Bus types and
makes sure never to accept wrong data. That would only matter when the
server (accidentally or intentionally) exposes unexpected types on
D-Bus. I don't think that was previously ensured in all cases.
For example, demarshal_generic() only cared about the GObject property
type, it didn't know the expected D-Bus type.
- Previously GDBusObjectManager would sometimes emit warnings (g_log()). Those
probably indicated real bugs. In any case, it prevented us from running CI
with G_DEBUG=fatal-warnings, because there would be just too many
unrelated crashes. Now we log debug messages that can be enabled with
"LIBNM_CLIENT_DEBUG=trace". Some of these messages can also be turned
into g_warning()/g_critical() by setting LIBNM_CLIENT_DEBUG=warning,error.
Together with G_DEBUG=fatal-warnings, this turns them into assertions.
Note that such "assertion failures" might also happen because of a server
bug (or change). Thus these are not common assertions that indicate a bug
in libnm and are thus not armed unless explicitly requested. In our CI we
should now always run with LIBNM_CLIENT_DEBUG=warning,error and
G_DEBUG=fatal-warnings and to catch bugs. Note that currently
NetworkManager has bugs in this regard, so enabling this will result in
assertion failures. That should be fixed first.
- Note that this changes the order in which we emit "notify:devices" and
"device-added" signals. I think it makes the most sense to emit first
"device-removed", then "notify:devices", and finally "device-added"
signals.
This changes behavior for commit 52ae28f6e5bf ('libnm: queue
added/removed signals and suppress uninitialized notifications'),
but I don't think that users should actually rely on the order. Still,
the new order makes the most sense to me.
- In NetworkManager, profiles can be invisible to the user by setting
"connection.permissions". Such profiles would be hidden by NMClient's
nm_client_get_connections() and their "connection-added"/"connection-removed"
signals.
Note that NMActiveConnection's nm_active_connection_get_connection()
and NMDevice's nm_device_get_available_connections() still exposes such
hidden NMRemoteConnection instances. This behavior was preserved.
NUMBERS
-------
I compared 3 versions of libnm.
[1] 962297f9085d, current tip of nm-1-20 branch
[2] 4fad8c7c642e, current master, immediate parent of this patch
[3] this patch
All tests were done on Fedora 31, x86_64, gcc 9.2.1-1.fc31.
The libraries were build with
$ ./contrib/fedora/rpm/build_clean.sh -g -w test -W debug
Note that RPM build already stripped the library.
---
N1) File size of libnm.so.0.1.0 in bytes. There currently seems to be a issue
on Fedora 31 generating wrong ELF notes. Usually, libnm is smaller but
in these tests it had large (and bogus) ELF notes. Anyway, the point
is to show the relative sizes, so it doesn't matter).
[1] 4075552 (102.7%)
[2] 3969624 (100.0%)
[3] 3705208 ( 93.3%)
---
N2) `size /usr/lib64/libnm.so.0.1.0`:
text data bss dec hex filename
[1] 1314569 (102.0%) 69980 ( 94.8%) 10632 ( 80.4%) 1395181 (101.4%) 1549ed /usr/lib64/libnm.so.0.1.0
[2] 1288410 (100.0%) 73796 (100.0%) 13224 (100.0%) 1375430 (100.0%) 14fcc6 /usr/lib64/libnm.so.0.1.0
[3] 1229066 ( 95.4%) 65248 ( 88.4%) 13400 (101.3%) 1307714 ( 95.1%) 13f442 /usr/lib64/libnm.so.0.1.0
---
N3) Performance test with test-client.py. With checkout of [2], run
```
prepare_checkout() {
rm -rf /tmp/nm-test && \
git checkout -B test 4fad8c7c642e && \
git clean -fdx && \
./autogen.sh --prefix=/tmp/nm-test && \
make -j 5 install && \
make -j 5 check-local-clients-tests-test-client
}
prepare_test() {
NM_TEST_REGENERATE=1 NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v
}
do_test() {
for i in {1..10}; do
NM_TEST_CLIENT_BUILDDIR="/data/src/NetworkManager" NM_TEST_CLIENT_NMCLI_PATH=/usr/bin/nmcli python3 ./clients/tests/test-client.py -v || return -1
done
echo "done!"
}
prepare_checkout
prepare_test
time do_test
```
[1] real 2m14.497s (101.3%) user 5m26.651s (100.3%) sys 1m40.453s (101.4%)
[2] real 2m12.800s (100.0%) user 5m25.619s (100.0%) sys 1m39.065s (100.0%)
[3] real 1m54.915s ( 86.5%) user 4m18.585s ( 79.4%) sys 1m32.066s ( 92.9%)
---
N4) Performance. Run NetworkManager from build [2] and setup a large number
of profiles (551 profiles and 515 devices, mostly unrealized). This
setup is already at the edge of what NetworkManager currently can
handle. Of course, that is a different issue. Here we just check how
long plain `nmcli` takes on the system.
```
do_cleanup() {
for UUID in $(nmcli -g NAME,UUID connection show | sed -n 's/^xx-c-.*:\([^:]\+\)$/\1/p'); do
nmcli connection delete uuid "$UUID"
done
for DEVICE in $(nmcli -g DEVICE device status | grep '^xx-i-'); do
nmcli device delete "$DEVICE"
done
}
do_setup() {
do_cleanup
for i in {1..30}; do
nmcli connection add type bond autoconnect no con-name xx-c-bond-$i ifname xx-i-bond-$i ipv4.method disabled ipv6.method ignore
for j in $(seq $i 30); do
nmcli connection add type vlan autoconnect no con-name xx-c-vlan-$i-$j vlan.id $j ifname xx-i-vlan-$i-$j vlan.parent xx-i-bond-$i ipv4.method disabled ipv6.method ignore
done
done
systemctl restart NetworkManager.service
sleep 5
}
do_test() {
perf stat -r 50 -B nmcli 1>/dev/null
}
do_test
```
[1]
Performance counter stats for 'nmcli' (50 runs):
456.33 msec task-clock:u # 1.093 CPUs utilized ( +- 0.44% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,900 page-faults:u # 0.013 M/sec ( +- 0.02% )
1,408,675,453 cycles:u # 3.087 GHz ( +- 0.48% )
1,594,741,060 instructions:u # 1.13 insn per cycle ( +- 0.02% )
368,744,018 branches:u # 808.061 M/sec ( +- 0.02% )
4,566,058 branch-misses:u # 1.24% of all branches ( +- 0.76% )
0.41761 +- 0.00282 seconds time elapsed ( +- 0.68% )
[2]
Performance counter stats for 'nmcli' (50 runs):
477.99 msec task-clock:u # 1.088 CPUs utilized ( +- 0.36% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
5,948 page-faults:u # 0.012 M/sec ( +- 0.03% )
1,471,133,482 cycles:u # 3.078 GHz ( +- 0.36% )
1,655,275,369 instructions:u # 1.13 insn per cycle ( +- 0.02% )
382,595,152 branches:u # 800.433 M/sec ( +- 0.02% )
4,746,070 branch-misses:u # 1.24% of all branches ( +- 0.49% )
0.43923 +- 0.00242 seconds time elapsed ( +- 0.55% )
[3]
Performance counter stats for 'nmcli' (50 runs):
352.36 msec task-clock:u # 1.027 CPUs utilized ( +- 0.32% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
4,790 page-faults:u # 0.014 M/sec ( +- 0.26% )
1,092,341,186 cycles:u # 3.100 GHz ( +- 0.26% )
1,209,045,283 instructions:u # 1.11 insn per cycle ( +- 0.02% )
281,708,462 branches:u # 799.499 M/sec ( +- 0.01% )
3,101,031 branch-misses:u # 1.10% of all branches ( +- 0.61% )
0.34296 +- 0.00120 seconds time elapsed ( +- 0.35% )
---
N5) same setup as N4), but run `PAGER= /bin/time -v nmcli`:
[1]
Command being timed: "nmcli"
User time (seconds): 0.42
System time (seconds): 0.04
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.43
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34456
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6128
Voluntary context switches: 1298
Involuntary context switches: 1106
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[2]
Command being timed: "nmcli"
User time (seconds): 0.44
System time (seconds): 0.04
Percent of CPU this job got: 108%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.44
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 34452
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 6169
Voluntary context switches: 1849
Involuntary context switches: 142
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
[3]
Command being timed: "nmcli"
User time (seconds): 0.32
System time (seconds): 0.02
Percent of CPU this job got: 102%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.34
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 29196
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 5059
Voluntary context switches: 919
Involuntary context switches: 685
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
---
N6) same setup as N4), but run `nmcli monitor` and look at `ps aux` for
the RSS size.
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[1] me 1492900 21.0 0.2 461348 33248 pts/10 Sl+ 15:02 0:00 nmcli monitor
[2] me 1490721 5.0 0.2 461496 33548 pts/10 Sl+ 15:00 0:00 nmcli monitor
[3] me 1495801 16.5 0.1 459476 28692 pts/10 Sl+ 15:04 0:00 nmcli monitor
2019-10-30 11:42:58 +01:00
|
|
|
_nml_dbus_meta_class_init_with_properties(object_class,
|
|
|
|
|
&_nml_dbus_meta_iface_nm,
|
|
|
|
|
&_nml_dbus_meta_iface_nm_settings,
|
|
|
|
|
&_nml_dbus_meta_iface_nm_dnsmanager);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2014-07-24 08:53:33 -04:00
|
|
|
/**
|
|
|
|
|
* NMClient::device-added:
|
|
|
|
|
* @client: the client that received the signal
|
|
|
|
|
* @device: (type NMDevice): the new device
|
|
|
|
|
*
|
2014-10-10 14:28:49 -05:00
|
|
|
* Notifies that a #NMDevice is added. This signal is not emitted for
|
|
|
|
|
* placeholder devices.
|
2014-07-24 08:53:33 -04:00
|
|
|
**/
|
2019-10-10 17:57:30 +02:00
|
|
|
signals[DEVICE_ADDED] = g_signal_new(NM_CLIENT_DEVICE_ADDED,
|
|
|
|
|
G_OBJECT_CLASS_TYPE(object_class),
|
|
|
|
|
G_SIGNAL_RUN_FIRST,
|
libnm: hide NMClient struct from public headers and use direct private field
Having the NMClient/NMClientClass structs in the public header allows
the user to subclass these types. Subclassing this type was never
intended, nor is it supported, nor does it seem useful. Subclassing only
makes sense if the type has suitable hooks to extend the type in a
meaningful way. NMClient hasn't, and everybody trying to derive from
this class would better delegate the actions.
Also, having these structs in the public header prevents us from
embedding the private data in the object structure itself.
It has thus an runtime overhead and is less convenient for debugging (it's
hard to find the private data pointer in gdb).
Most importantly, there is no easy way to find the offset of the private
data fields, short of calling NM_CLIENT_GET_PRIVATE() -- which currently
is a macro. Later we want to generically lookup the offset of the
private data, we would need NM_CLIENT_GET_PRIVATE() as a function.
Instead, by having an internally, statically known offset, we can use
that offset instead.
Also drop all signal hooks. They are also not useful.
This is an ABI and API change, but of something that we never wanted to
be part of the ABI/API, and which hopefull nobody is using.
2019-10-18 07:44:31 +02:00
|
|
|
0,
|
|
|
|
|
NULL,
|
|
|
|
|
NULL,
|
|
|
|
|
NULL,
|
2019-10-10 17:57:30 +02:00
|
|
|
G_TYPE_NONE,
|
|
|
|
|
1,
|
|
|
|
|
G_TYPE_OBJECT);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2014-07-24 08:53:33 -04:00
|
|
|
/**
|
|
|
|
|
* NMClient::device-removed:
|
|
|
|
|
* @client: the client that received the signal
|
|
|
|
|
* @device: (type NMDevice): the removed device
|
|
|
|
|
*
|
2014-10-10 14:28:49 -05:00
|
|
|
* Notifies that a #NMDevice is removed. This signal is not emitted for
|
|
|
|
|
* placeholder devices.
|
2014-07-24 08:53:33 -04:00
|
|
|
**/
|
2019-10-10 17:57:30 +02:00
|
|
|
signals[DEVICE_REMOVED] = g_signal_new(NM_CLIENT_DEVICE_REMOVED,
|
|
|
|
|
G_OBJECT_CLASS_TYPE(object_class),
|
|
|
|
|
G_SIGNAL_RUN_FIRST,
|
libnm: hide NMClient struct from public headers and use direct private field
Having the NMClient/NMClientClass structs in the public header allows
the user to subclass these types. Subclassing this type was never
intended, nor is it supported, nor does it seem useful. Subclassing only
makes sense if the type has suitable hooks to extend the type in a
meaningful way. NMClient hasn't, and everybody trying to derive from
this class would better delegate the actions.
Also, having these structs in the public header prevents us from
embedding the private data in the object structure itself.
It has thus an runtime overhead and is less convenient for debugging (it's
hard to find the private data pointer in gdb).
Most importantly, there is no easy way to find the offset of the private
data fields, short of calling NM_CLIENT_GET_PRIVATE() -- which currently
is a macro. Later we want to generically lookup the offset of the
private data, we would need NM_CLIENT_GET_PRIVATE() as a function.
Instead, by having an internally, statically known offset, we can use
that offset instead.
Also drop all signal hooks. They are also not useful.
This is an ABI and API change, but of something that we never wanted to
be part of the ABI/API, and which hopefull nobody is using.
2019-10-18 07:44:31 +02:00
|
|
|
0,
|
|
|
|
|
NULL,
|
|
|
|
|
NULL,
|
|
|
|
|
NULL,
|
2019-10-10 17:57:30 +02:00
|
|
|
G_TYPE_NONE,
|
|
|
|
|
1,
|
|
|
|
|
G_TYPE_OBJECT);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2014-10-10 14:28:49 -05:00
|
|
|
/**
|
|
|
|
|
* NMClient::any-device-added:
|
|
|
|
|
* @client: the client that received the signal
|
|
|
|
|
* @device: (type NMDevice): the new device
|
|
|
|
|
*
|
|
|
|
|
* Notifies that a #NMDevice is added. This signal is emitted for both
|
|
|
|
|
* regular devices and placeholder devices.
|
|
|
|
|
**/
|
2019-10-10 17:57:30 +02:00
|
|
|
signals[ANY_DEVICE_ADDED] = g_signal_new(NM_CLIENT_ANY_DEVICE_ADDED,
|
|
|
|
|
G_OBJECT_CLASS_TYPE(object_class),
|
|
|
|
|
G_SIGNAL_RUN_FIRST,
|
libnm: hide NMClient struct from public headers and use direct private field
Having the NMClient/NMClientClass structs in the public header allows
the user to subclass these types. Subclassing this type was never
intended, nor is it supported, nor does it seem useful. Subclassing only
makes sense if the type has suitable hooks to extend the type in a
meaningful way. NMClient hasn't, and everybody trying to derive from
this class would better delegate the actions.
Also, having these structs in the public header prevents us from
embedding the private data in the object structure itself.
It has thus an runtime overhead and is less convenient for debugging (it's
hard to find the private data pointer in gdb).
Most importantly, there is no easy way to find the offset of the private
data fields, short of calling NM_CLIENT_GET_PRIVATE() -- which currently
is a macro. Later we want to generically lookup the offset of the
private data, we would need NM_CLIENT_GET_PRIVATE() as a function.
Instead, by having an internally, statically known offset, we can use
that offset instead.
Also drop all signal hooks. They are also not useful.
This is an ABI and API change, but of something that we never wanted to
be part of the ABI/API, and which hopefull nobody is using.
2019-10-18 07:44:31 +02:00
|
|
|
0,
|
|
|
|
|
NULL,
|
|
|
|
|
NULL,
|
|
|
|
|
NULL,
|
2019-10-10 17:57:30 +02:00
|
|
|
G_TYPE_NONE,
|
|
|
|
|
1,
|
|
|
|
|
G_TYPE_OBJECT);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2014-10-10 14:28:49 -05:00
|
|
|
/**
|
|
|
|
|
* NMClient::any-device-removed:
|
|
|
|
|
* @client: the client that received the signal
|
|
|
|
|
* @device: (type NMDevice): the removed device
|
|
|
|
|
*
|
|
|
|
|
* Notifies that a #NMDevice is removed. This signal is emitted for both
|
|
|
|
|
* regular devices and placeholder devices.
|
|
|
|
|
**/
|
2019-10-10 17:57:30 +02:00
|
|
|
signals[ANY_DEVICE_REMOVED] = g_signal_new(NM_CLIENT_ANY_DEVICE_REMOVED,
|
|
|
|
|
G_OBJECT_CLASS_TYPE(object_class),
|
|
|
|
|
G_SIGNAL_RUN_FIRST,
|
libnm: hide NMClient struct from public headers and use direct private field
Having the NMClient/NMClientClass structs in the public header allows
the user to subclass these types. Subclassing this type was never
intended, nor is it supported, nor does it seem useful. Subclassing only
makes sense if the type has suitable hooks to extend the type in a
meaningful way. NMClient hasn't, and everybody trying to derive from
this class would better delegate the actions.
Also, having these structs in the public header prevents us from
embedding the private data in the object structure itself.
It has thus an runtime overhead and is less convenient for debugging (it's
hard to find the private data pointer in gdb).
Most importantly, there is no easy way to find the offset of the private
data fields, short of calling NM_CLIENT_GET_PRIVATE() -- which currently
is a macro. Later we want to generically lookup the offset of the
private data, we would need NM_CLIENT_GET_PRIVATE() as a function.
Instead, by having an internally, statically known offset, we can use
that offset instead.
Also drop all signal hooks. They are also not useful.
This is an ABI and API change, but of something that we never wanted to
be part of the ABI/API, and which hopefull nobody is using.
2019-10-18 07:44:31 +02:00
|
|
|
0,
|
|
|
|
|
NULL,
|
|
|
|
|
NULL,
|
|
|
|
|
NULL,
|
2019-10-10 17:57:30 +02:00
|
|
|
G_TYPE_NONE,
|
|
|
|
|
1,
|
|
|
|
|
G_TYPE_OBJECT);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2014-07-24 08:53:33 -04:00
|
|
|
/**
|
|
|
|
|
* NMClient::permission-changed:
|
|
|
|
|
* @client: the client that received the signal
|
|
|
|
|
* @permission: a permission from #NMClientPermission
|
|
|
|
|
* @result: the permission's result, one of #NMClientPermissionResult
|
|
|
|
|
*
|
|
|
|
|
* Notifies that a permission has changed
|
|
|
|
|
**/
|
2019-10-10 17:57:30 +02:00
|
|
|
signals[PERMISSION_CHANGED] = g_signal_new(NM_CLIENT_PERMISSION_CHANGED,
|
|
|
|
|
G_OBJECT_CLASS_TYPE(object_class),
|
|
|
|
|
G_SIGNAL_RUN_FIRST,
|
|
|
|
|
0,
|
|
|
|
|
NULL,
|
|
|
|
|
NULL,
|
|
|
|
|
NULL,
|
|
|
|
|
G_TYPE_NONE,
|
|
|
|
|
2,
|
|
|
|
|
G_TYPE_UINT,
|
|
|
|
|
G_TYPE_UINT);
|
2014-09-29 10:58:16 -04:00
|
|
|
/**
|
|
|
|
|
* NMClient::connection-added:
|
2022-05-03 17:31:40 +02:00
|
|
|
* @client: the client that received the signal
|
2014-09-29 10:58:16 -04:00
|
|
|
* @connection: the new connection
|
|
|
|
|
*
|
|
|
|
|
* Notifies that a #NMConnection has been added.
|
|
|
|
|
**/
|
2019-10-10 17:57:30 +02:00
|
|
|
signals[CONNECTION_ADDED] = g_signal_new(NM_CLIENT_CONNECTION_ADDED,
|
|
|
|
|
G_OBJECT_CLASS_TYPE(object_class),
|
|
|
|
|
G_SIGNAL_RUN_FIRST,
|
libnm: hide NMClient struct from public headers and use direct private field
Having the NMClient/NMClientClass structs in the public header allows
the user to subclass these types. Subclassing this type was never
intended, nor is it supported, nor does it seem useful. Subclassing only
makes sense if the type has suitable hooks to extend the type in a
meaningful way. NMClient hasn't, and everybody trying to derive from
this class would better delegate the actions.
Also, having these structs in the public header prevents us from
embedding the private data in the object structure itself.
It has thus an runtime overhead and is less convenient for debugging (it's
hard to find the private data pointer in gdb).
Most importantly, there is no easy way to find the offset of the private
data fields, short of calling NM_CLIENT_GET_PRIVATE() -- which currently
is a macro. Later we want to generically lookup the offset of the
private data, we would need NM_CLIENT_GET_PRIVATE() as a function.
Instead, by having an internally, statically known offset, we can use
that offset instead.
Also drop all signal hooks. They are also not useful.
This is an ABI and API change, but of something that we never wanted to
be part of the ABI/API, and which hopefull nobody is using.
2019-10-18 07:44:31 +02:00
|
|
|
0,
|
|
|
|
|
NULL,
|
|
|
|
|
NULL,
|
|
|
|
|
NULL,
|
2019-10-10 17:57:30 +02:00
|
|
|
G_TYPE_NONE,
|
|
|
|
|
1,
|
|
|
|
|
NM_TYPE_REMOTE_CONNECTION);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2014-09-29 10:58:16 -04:00
|
|
|
/**
|
|
|
|
|
* NMClient::connection-removed:
|
2022-05-03 17:31:40 +02:00
|
|
|
* @client: the client that received the signal
|
2014-09-29 10:58:16 -04:00
|
|
|
* @connection: the removed connection
|
|
|
|
|
*
|
|
|
|
|
* Notifies that a #NMConnection has been removed.
|
|
|
|
|
**/
|
2019-10-10 17:57:30 +02:00
|
|
|
signals[CONNECTION_REMOVED] = g_signal_new(NM_CLIENT_CONNECTION_REMOVED,
|
|
|
|
|
G_OBJECT_CLASS_TYPE(object_class),
|
|
|
|
|
G_SIGNAL_RUN_FIRST,
|
libnm: hide NMClient struct from public headers and use direct private field
Having the NMClient/NMClientClass structs in the public header allows
the user to subclass these types. Subclassing this type was never
intended, nor is it supported, nor does it seem useful. Subclassing only
makes sense if the type has suitable hooks to extend the type in a
meaningful way. NMClient hasn't, and everybody trying to derive from
this class would better delegate the actions.
Also, having these structs in the public header prevents us from
embedding the private data in the object structure itself.
It has thus an runtime overhead and is less convenient for debugging (it's
hard to find the private data pointer in gdb).
Most importantly, there is no easy way to find the offset of the private
data fields, short of calling NM_CLIENT_GET_PRIVATE() -- which currently
is a macro. Later we want to generically lookup the offset of the
private data, we would need NM_CLIENT_GET_PRIVATE() as a function.
Instead, by having an internally, statically known offset, we can use
that offset instead.
Also drop all signal hooks. They are also not useful.
This is an ABI and API change, but of something that we never wanted to
be part of the ABI/API, and which hopefull nobody is using.
2019-10-18 07:44:31 +02:00
|
|
|
0,
|
|
|
|
|
NULL,
|
|
|
|
|
NULL,
|
|
|
|
|
NULL,
|
2019-10-10 17:57:30 +02:00
|
|
|
G_TYPE_NONE,
|
|
|
|
|
1,
|
|
|
|
|
NM_TYPE_REMOTE_CONNECTION);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2016-07-07 11:35:19 +02:00
|
|
|
/**
|
|
|
|
|
* NMClient::active-connection-added:
|
2022-05-03 17:31:40 +02:00
|
|
|
* @client: the client that received the signal
|
2016-07-07 11:35:19 +02:00
|
|
|
* @active_connection: the new active connection
|
|
|
|
|
*
|
|
|
|
|
* Notifies that a #NMActiveConnection has been added.
|
|
|
|
|
**/
|
2019-10-10 17:57:30 +02:00
|
|
|
signals[ACTIVE_CONNECTION_ADDED] = g_signal_new(NM_CLIENT_ACTIVE_CONNECTION_ADDED,
|
|
|
|
|
G_OBJECT_CLASS_TYPE(object_class),
|
|
|
|
|
G_SIGNAL_RUN_FIRST,
|
|
|
|
|
0,
|
|
|
|
|
NULL,
|
|
|
|
|
NULL,
|
|
|
|
|
NULL,
|
|
|
|
|
G_TYPE_NONE,
|
|
|
|
|
1,
|
|
|
|
|
NM_TYPE_ACTIVE_CONNECTION);
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2016-07-07 11:35:19 +02:00
|
|
|
/**
|
|
|
|
|
* NMClient::active-connection-removed:
|
2022-05-03 17:31:40 +02:00
|
|
|
* @client: the client that received the signal
|
2016-07-07 11:35:19 +02:00
|
|
|
* @active_connection: the removed active connection
|
|
|
|
|
*
|
|
|
|
|
* Notifies that a #NMActiveConnection has been removed.
|
|
|
|
|
**/
|
2019-10-10 17:57:30 +02:00
|
|
|
signals[ACTIVE_CONNECTION_REMOVED] = g_signal_new(NM_CLIENT_ACTIVE_CONNECTION_REMOVED,
|
|
|
|
|
G_OBJECT_CLASS_TYPE(object_class),
|
|
|
|
|
G_SIGNAL_RUN_FIRST,
|
|
|
|
|
0,
|
|
|
|
|
NULL,
|
|
|
|
|
NULL,
|
|
|
|
|
NULL,
|
|
|
|
|
G_TYPE_NONE,
|
|
|
|
|
1,
|
|
|
|
|
NM_TYPE_ACTIVE_CONNECTION);
|
2014-07-24 08:53:33 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
nm_client_initable_iface_init(GInitableIface *iface)
|
|
|
|
|
{
|
|
|
|
|
iface->init = init_sync;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
nm_client_async_initable_iface_init(GAsyncInitableIface *iface)
|
|
|
|
|
{
|
|
|
|
|
iface->init_async = init_async;
|
|
|
|
|
iface->init_finish = init_finish;
|
|
|
|
|
}
|
2016-10-12 11:26:26 +02:00
|
|
|
|
libnm: add nm_client_wait_shutdown() function for cleaning up NMClient
Add a fire-and-forget function to wait for shutdown to be complete.
It's not entirely trivial to ensure all resources of NMClient are
cleaned up. That matters only if NMClient uses a temporary GMainContext
that the user wants to release while the application continues. For
example, to do some short-lived operations an a worker thread. It's
not trivial also because glib provides no convenient API to integrate
a GMainContext in another GMainContext. We have that code as
nm_utils_g_main_context_create_integrate_source(), so add a helper
function to allow the user to do this.
The function allows to omit the callback, in which case the caller
wouldn't know when shutdown is complete. That would still be useful
however, when integrating the client's context into the caller's
context, so that the client's context gets automatically iterated
until completion.
The following test script will run out of file descriptors,
when wait_shutdown() is not used:
#!/bin/python
import gi
gi.require_version("NM", "1.0")
from gi.repository import NM, GLib
for i in range(1200):
print(f">>>{i}")
ctx = GLib.MainContext()
ctx.push_thread_default()
nmc = NM.Client.new()
ctx.pop_thread_default()
def cb(unused, result, i):
try:
NM.Client.wait_shutdown_finish(result)
except Exception:
# cannot happen
assert False
else:
print(f">>>>> {i} complete")
nmc.wait_shutdown(True, None, cb, i)
while GLib.MainContext.default().iteration(False):
pass
2022-10-05 12:22:51 +02:00
|
|
|
/*****************************************************************************/
|
|
|
|
|
|
|
|
|
|
typedef struct {
|
|
|
|
|
GCancellable *cancellable;
|
|
|
|
|
GSource *integration_source;
|
|
|
|
|
GTask *task;
|
|
|
|
|
GSource *idle_source;
|
|
|
|
|
|
|
|
|
|
/* A weakref to nm_client_get_context_busy_watcher() */
|
|
|
|
|
GWeakRef weak_ref;
|
|
|
|
|
|
|
|
|
|
gulong cancellable_id;
|
|
|
|
|
|
|
|
|
|
guint64 log_ptr;
|
|
|
|
|
|
|
|
|
|
int result;
|
|
|
|
|
} WaitShutdownData;
|
|
|
|
|
|
|
|
|
|
G_LOCK_DEFINE_STATIC(wait_shutdown_mutex);
|
|
|
|
|
|
|
|
|
|
static NM_CACHED_QUARK_FCN("nm.client.wait-shutdown", _wait_shutdown_get_quark);
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
_wait_shutdown_data_free(gpointer user_data)
|
|
|
|
|
{
|
|
|
|
|
WaitShutdownData *data = user_data;
|
|
|
|
|
|
|
|
|
|
nm_g_slice_free(data);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static gboolean
|
|
|
|
|
_wait_shutdown_idle_cb(gpointer user_data)
|
|
|
|
|
{
|
|
|
|
|
WaitShutdownData *data = user_data;
|
|
|
|
|
gs_unref_object GTask *task = NULL;
|
|
|
|
|
int result;
|
|
|
|
|
|
|
|
|
|
nm_clear_g_source_inst(&data->idle_source);
|
|
|
|
|
|
|
|
|
|
task = g_steal_pointer(&data->task);
|
|
|
|
|
|
|
|
|
|
result = g_atomic_int_get(&data->result);
|
|
|
|
|
nm_assert(NM_IN_SET(result, 0, 1));
|
|
|
|
|
|
|
|
|
|
NML_DBUS_LOG(_NML_NMCLIENT_LOG_LEVEL_COERCE(NML_DBUS_LOG_LEVEL_TRACE),
|
|
|
|
|
"nmclient[" NM_HASH_OBFUSCATE_PTR_FMT
|
|
|
|
|
"]: wait-shutdown (" NM_HASH_OBFUSCATE_PTR_FMT ")"
|
|
|
|
|
"%s",
|
|
|
|
|
data->log_ptr,
|
|
|
|
|
NM_HASH_OBFUSCATE_PTR(data),
|
|
|
|
|
!result ? " cancelled" : " completed");
|
|
|
|
|
|
|
|
|
|
if (!result) {
|
|
|
|
|
GError *error = NULL;
|
|
|
|
|
|
|
|
|
|
nm_utils_error_set_cancelled(&error, FALSE, NULL);
|
|
|
|
|
g_task_return_error(task, error);
|
|
|
|
|
} else
|
|
|
|
|
g_task_return_boolean(task, TRUE);
|
|
|
|
|
|
|
|
|
|
return G_SOURCE_CONTINUE;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
_wait_shutdown_data_clear(WaitShutdownData *data, gboolean result)
|
|
|
|
|
{
|
|
|
|
|
gs_unref_object GObject *context_busy_watcher = NULL;
|
|
|
|
|
|
|
|
|
|
if (!g_atomic_int_compare_and_exchange(&data->result, -1, !!result)) {
|
|
|
|
|
/* There was a race and the result is already provided. Nothing to
|
|
|
|
|
* do, except, if "result" indicates cancellation (FALSE), set data->result
|
|
|
|
|
* to FALSE to. Aside that, the completion is already in progress. */
|
|
|
|
|
if (!result)
|
|
|
|
|
g_atomic_int_compare_and_exchange(&data->result, TRUE, FALSE);
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
nm_clear_g_signal_handler(data->cancellable, &data->cancellable_id);
|
|
|
|
|
nm_clear_g_source_inst(&data->integration_source);
|
|
|
|
|
g_clear_object(&data->cancellable);
|
|
|
|
|
|
|
|
|
|
if (!result) {
|
|
|
|
|
/* This was a cancellation. We likely still have the qdata tracked.
|
|
|
|
|
* We need to remove it. */
|
|
|
|
|
|
|
|
|
|
context_busy_watcher = g_weak_ref_get(&data->weak_ref);
|
|
|
|
|
if (context_busy_watcher) {
|
|
|
|
|
GPtrArray *qdata_arr;
|
|
|
|
|
|
|
|
|
|
G_LOCK(wait_shutdown_mutex);
|
|
|
|
|
qdata_arr = g_object_get_qdata(context_busy_watcher, _wait_shutdown_get_quark());
|
|
|
|
|
if (qdata_arr && g_ptr_array_remove_fast(qdata_arr, data)) {
|
|
|
|
|
/* data->task had an additional reference, we return it now. */
|
|
|
|
|
g_object_unref(data->task);
|
|
|
|
|
}
|
|
|
|
|
G_UNLOCK(wait_shutdown_mutex);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
g_weak_ref_clear(&data->weak_ref);
|
|
|
|
|
|
|
|
|
|
/* We don't complete right away, instead always schedule an idle action
|
|
|
|
|
* on the caller's context. */
|
|
|
|
|
data->idle_source = nm_g_source_attach(
|
|
|
|
|
nm_g_idle_source_new(G_PRIORITY_DEFAULT_IDLE, _wait_shutdown_idle_cb, data, NULL),
|
|
|
|
|
g_task_get_context(data->task));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
_wait_shutdown_qdata_cb(gpointer user_data)
|
|
|
|
|
{
|
|
|
|
|
gs_unref_ptrarray GPtrArray *qdata_arr = user_data;
|
|
|
|
|
|
|
|
|
|
while (qdata_arr->len > 0) {
|
|
|
|
|
WaitShutdownData *data;
|
|
|
|
|
|
|
|
|
|
data = g_ptr_array_remove_index_fast(qdata_arr, qdata_arr->len - 1);
|
|
|
|
|
_wait_shutdown_data_clear(data, TRUE);
|
|
|
|
|
|
|
|
|
|
/* data->task had an additional reference, we return it now. */
|
|
|
|
|
g_object_unref(data->task);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
_wait_shutdown_cancelled_cb(GCancellable *cancellable, gpointer user_data)
|
|
|
|
|
{
|
|
|
|
|
_wait_shutdown_data_clear(g_task_get_task_data(user_data), FALSE);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* nm_client_wait_shutdown:
|
|
|
|
|
* @client: the #NMClient to shutdown.
|
|
|
|
|
* @integrate_maincontext: whether to hook the client's maincontext
|
|
|
|
|
* in the current thread default. Otherwise, you must ensure
|
|
|
|
|
* that the client's maincontext gets iterated so that it can complete.
|
|
|
|
|
* By integrating the maincontext in the current thread default, you
|
|
|
|
|
* may instead only iterate the latter.
|
2023-03-01 01:21:38 +01:00
|
|
|
* @cancellable: the #GCancellable to abort the shutdown.
|
|
|
|
|
* @callback: a #GAsyncReadyCallback to call when the request
|
libnm: add nm_client_wait_shutdown() function for cleaning up NMClient
Add a fire-and-forget function to wait for shutdown to be complete.
It's not entirely trivial to ensure all resources of NMClient are
cleaned up. That matters only if NMClient uses a temporary GMainContext
that the user wants to release while the application continues. For
example, to do some short-lived operations an a worker thread. It's
not trivial also because glib provides no convenient API to integrate
a GMainContext in another GMainContext. We have that code as
nm_utils_g_main_context_create_integrate_source(), so add a helper
function to allow the user to do this.
The function allows to omit the callback, in which case the caller
wouldn't know when shutdown is complete. That would still be useful
however, when integrating the client's context into the caller's
context, so that the client's context gets automatically iterated
until completion.
The following test script will run out of file descriptors,
when wait_shutdown() is not used:
#!/bin/python
import gi
gi.require_version("NM", "1.0")
from gi.repository import NM, GLib
for i in range(1200):
print(f">>>{i}")
ctx = GLib.MainContext()
ctx.push_thread_default()
nmc = NM.Client.new()
ctx.pop_thread_default()
def cb(unused, result, i):
try:
NM.Client.wait_shutdown_finish(result)
except Exception:
# cannot happen
assert False
else:
print(f">>>>> {i} complete")
nmc.wait_shutdown(True, None, cb, i)
while GLib.MainContext.default().iteration(False):
pass
2022-10-05 12:22:51 +02:00
|
|
|
* is satisfied or %NULL if you don't care about the result of the
|
|
|
|
|
* method invocation.
|
|
|
|
|
* @user_data: the data to pass to @callback
|
|
|
|
|
*
|
|
|
|
|
* The way to stop #NMClient is by unrefing it. That will cancel all
|
|
|
|
|
* internally pending async operations. However, as async operations in
|
|
|
|
|
* NMClient use GTask, hence they cannot complete right away. Instead,
|
|
|
|
|
* their (internal) result callback still needs to be dispatched by iterating
|
|
|
|
|
* the client's main context.
|
|
|
|
|
*
|
|
|
|
|
* You thus cannot stop iterating the client's main context until
|
|
|
|
|
* everything is wrapped up. nm_client_get_context_busy_watcher()
|
|
|
|
|
* helps to watch how long that will be.
|
|
|
|
|
*
|
|
|
|
|
* This function automates that waiting. Like all glib async operations
|
|
|
|
|
* this honors the current g_main_context_get_thread_default().
|
|
|
|
|
*
|
|
|
|
|
* In any case, to complete the shutdown, nm_client_get_main_context()
|
|
|
|
|
* must be iterated. If the current g_main_context_get_thread_default() is
|
|
|
|
|
* the same as nm_client_get_main_context(), then @integrate_maincontext
|
|
|
|
|
* is ignored. In that case, the caller is required to iterate the context
|
|
|
|
|
* for shutdown to complete. Otherwise, if g_main_context_get_thread_default()
|
|
|
|
|
* differs from nm_client_get_main_context() and @integrate_maincontext
|
|
|
|
|
* is %FALSE, the caller must make sure that both contexts are iterated
|
|
|
|
|
* until completion. Otherwise, if @integrate_maincontext is %TRUE, then
|
|
|
|
|
* nm_client_get_main_context() will be integrated in g_main_context_get_thread_default().
|
|
|
|
|
* This means, the caller gives nm_client_get_main_context() up until the waiting
|
|
|
|
|
* completes, the function will acquire the context and hook it into
|
|
|
|
|
* g_main_context_get_thread_default().
|
|
|
|
|
* It is a bug to request @integrate_maincontext while having nm_client_get_main_context()
|
|
|
|
|
* acquired or iterated otherwise because a context can only be acquired once
|
|
|
|
|
* at a time.
|
|
|
|
|
*
|
|
|
|
|
* Shutdown can only complete after all references to @client were released.
|
|
|
|
|
*
|
|
|
|
|
* It is possible to call this function multiple times for the same client.
|
|
|
|
|
* But note that with @integrate_maincontext the client's context is acquired,
|
|
|
|
|
* which can only be done once at a time.
|
|
|
|
|
*
|
|
|
|
|
* It is permissible to start waiting before the objects is fully initialized.
|
|
|
|
|
*
|
|
|
|
|
* The function really allows two separate things. To get a notification (callback) when
|
|
|
|
|
* shutdown is complete, and to integrate the client's context in another context.
|
|
|
|
|
* The latter case is useful if the client has a separate context and you hand it
|
|
|
|
|
* over to another GMainContext to wrap up.
|
|
|
|
|
*
|
|
|
|
|
* The main use is to have a NMClient and a separate GMainContext on a worker
|
|
|
|
|
* thread. When being done, you can hand over the cleanup of the context
|
|
|
|
|
* to g_main_context_default(), assuming that the main thread iterates
|
|
|
|
|
* the default context. In that case, you don't need to care about passing
|
|
|
|
|
* a callback to know when shutdown completed.
|
|
|
|
|
*
|
|
|
|
|
* Since: 1.42
|
|
|
|
|
*/
|
|
|
|
|
void
|
|
|
|
|
nm_client_wait_shutdown(NMClient *client,
|
|
|
|
|
gboolean integrate_maincontext,
|
|
|
|
|
GCancellable *cancellable,
|
|
|
|
|
GAsyncReadyCallback callback,
|
|
|
|
|
gpointer user_data)
|
|
|
|
|
{
|
|
|
|
|
NMClientPrivate *priv;
|
|
|
|
|
WaitShutdownData *data;
|
|
|
|
|
gs_unref_object GTask *task = NULL;
|
|
|
|
|
GQuark quark = _wait_shutdown_get_quark();
|
|
|
|
|
GPtrArray *qdata_arr;
|
|
|
|
|
GSource *integration_source = NULL;
|
|
|
|
|
|
|
|
|
|
g_return_if_fail(NM_IS_CLIENT(client));
|
|
|
|
|
g_return_if_fail(!cancellable || G_IS_CANCELLABLE(cancellable));
|
|
|
|
|
|
|
|
|
|
priv = NM_CLIENT_GET_PRIVATE(client);
|
|
|
|
|
|
|
|
|
|
task = nm_g_task_new(NULL, cancellable, nm_client_wait_shutdown, callback, user_data);
|
|
|
|
|
|
|
|
|
|
if (integrate_maincontext && g_task_get_context(task) != priv->main_context) {
|
|
|
|
|
integration_source = nm_utils_g_main_context_create_integrate_source(priv->main_context);
|
|
|
|
|
g_return_if_fail(integration_source);
|
|
|
|
|
g_source_attach(integration_source, g_task_get_context(task));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
data = g_slice_new(WaitShutdownData);
|
2024-10-04 10:45:56 +02:00
|
|
|
*data = (WaitShutdownData) {
|
libnm: add nm_client_wait_shutdown() function for cleaning up NMClient
Add a fire-and-forget function to wait for shutdown to be complete.
It's not entirely trivial to ensure all resources of NMClient are
cleaned up. That matters only if NMClient uses a temporary GMainContext
that the user wants to release while the application continues. For
example, to do some short-lived operations an a worker thread. It's
not trivial also because glib provides no convenient API to integrate
a GMainContext in another GMainContext. We have that code as
nm_utils_g_main_context_create_integrate_source(), so add a helper
function to allow the user to do this.
The function allows to omit the callback, in which case the caller
wouldn't know when shutdown is complete. That would still be useful
however, when integrating the client's context into the caller's
context, so that the client's context gets automatically iterated
until completion.
The following test script will run out of file descriptors,
when wait_shutdown() is not used:
#!/bin/python
import gi
gi.require_version("NM", "1.0")
from gi.repository import NM, GLib
for i in range(1200):
print(f">>>{i}")
ctx = GLib.MainContext()
ctx.push_thread_default()
nmc = NM.Client.new()
ctx.pop_thread_default()
def cb(unused, result, i):
try:
NM.Client.wait_shutdown_finish(result)
except Exception:
# cannot happen
assert False
else:
print(f">>>>> {i} complete")
nmc.wait_shutdown(True, None, cb, i)
while GLib.MainContext.default().iteration(False):
pass
2022-10-05 12:22:51 +02:00
|
|
|
.cancellable = nm_g_object_ref(cancellable),
|
|
|
|
|
.task = g_object_ref(task),
|
|
|
|
|
.result = -1,
|
|
|
|
|
.integration_source = integration_source,
|
|
|
|
|
.log_ptr = NM_HASH_OBFUSCATE_PTR(client),
|
|
|
|
|
};
|
|
|
|
|
/* The "data" itself stays alive as long as the task lives. That's important, because
|
|
|
|
|
* the callbacks _wait_shutdown_weak_ref_cb and _wait_shutdown_cancelled_cb
|
|
|
|
|
* rely on accessing "data", which must live long enough. */
|
|
|
|
|
g_task_set_task_data(task, data, _wait_shutdown_data_free);
|
|
|
|
|
|
|
|
|
|
g_weak_ref_init(&data->weak_ref, priv->context_busy_watcher);
|
|
|
|
|
|
|
|
|
|
NML_DBUS_LOG(_NML_NMCLIENT_LOG_LEVEL_COERCE(NML_DBUS_LOG_LEVEL_TRACE),
|
|
|
|
|
"nmclient[" NM_HASH_OBFUSCATE_PTR_FMT
|
|
|
|
|
"]: wait-shutdown (" NM_HASH_OBFUSCATE_PTR_FMT ")"
|
|
|
|
|
"%s",
|
|
|
|
|
data->log_ptr,
|
|
|
|
|
NM_HASH_OBFUSCATE_PTR(data),
|
|
|
|
|
integration_source ? " (integrated main source)" : "");
|
|
|
|
|
|
|
|
|
|
/* I don't think g_object_weak_ref() + GWeakRef can actually be used in
|
|
|
|
|
* a race-free way here, because g_object_weak_ref() has no GDestroyNotify
|
|
|
|
|
* and cannot keep task alive to avoid races. Instead, implement a weak pointer
|
|
|
|
|
* notification via the qdata.
|
|
|
|
|
*
|
|
|
|
|
* Yes, getting cancellation thread-safe is rather complicated here! I think
|
|
|
|
|
* the code is correct though, and you can cancel the operation from
|
|
|
|
|
* any thread without races. */
|
|
|
|
|
G_LOCK(wait_shutdown_mutex);
|
|
|
|
|
qdata_arr = g_object_get_qdata(priv->context_busy_watcher, quark);
|
|
|
|
|
if (!qdata_arr) {
|
|
|
|
|
qdata_arr = g_ptr_array_new();
|
|
|
|
|
g_object_set_qdata_full(priv->context_busy_watcher,
|
|
|
|
|
quark,
|
|
|
|
|
qdata_arr,
|
|
|
|
|
_wait_shutdown_qdata_cb);
|
|
|
|
|
}
|
|
|
|
|
/* data->task gets an additional reference, take it now. */
|
|
|
|
|
g_object_ref(data->task);
|
|
|
|
|
g_ptr_array_add(qdata_arr, data);
|
|
|
|
|
G_UNLOCK(wait_shutdown_mutex);
|
|
|
|
|
|
|
|
|
|
if (data->cancellable) {
|
|
|
|
|
/* Take an additional reference on task. */
|
|
|
|
|
data->cancellable_id = g_cancellable_connect(data->cancellable,
|
|
|
|
|
G_CALLBACK(_wait_shutdown_cancelled_cb),
|
|
|
|
|
g_object_ref(task),
|
|
|
|
|
g_object_unref);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* nm_client_wait_shutdown_finish:
|
|
|
|
|
* @result: a #GAsyncResult obtained from the #GAsyncReadyCallback passed to nm_client_wait_shutdown()
|
|
|
|
|
* @error: return location for error or %NULL
|
|
|
|
|
*
|
|
|
|
|
* Returns: %TRUE if waiting is complete successfully. In that case, all resources of the
|
|
|
|
|
* nmclient are wrapped up and released. This can only fail by user cancellation.
|
|
|
|
|
*
|
2022-11-07 21:13:28 +01:00
|
|
|
* Since: 1.42
|
libnm: add nm_client_wait_shutdown() function for cleaning up NMClient
Add a fire-and-forget function to wait for shutdown to be complete.
It's not entirely trivial to ensure all resources of NMClient are
cleaned up. That matters only if NMClient uses a temporary GMainContext
that the user wants to release while the application continues. For
example, to do some short-lived operations an a worker thread. It's
not trivial also because glib provides no convenient API to integrate
a GMainContext in another GMainContext. We have that code as
nm_utils_g_main_context_create_integrate_source(), so add a helper
function to allow the user to do this.
The function allows to omit the callback, in which case the caller
wouldn't know when shutdown is complete. That would still be useful
however, when integrating the client's context into the caller's
context, so that the client's context gets automatically iterated
until completion.
The following test script will run out of file descriptors,
when wait_shutdown() is not used:
#!/bin/python
import gi
gi.require_version("NM", "1.0")
from gi.repository import NM, GLib
for i in range(1200):
print(f">>>{i}")
ctx = GLib.MainContext()
ctx.push_thread_default()
nmc = NM.Client.new()
ctx.pop_thread_default()
def cb(unused, result, i):
try:
NM.Client.wait_shutdown_finish(result)
except Exception:
# cannot happen
assert False
else:
print(f">>>>> {i} complete")
nmc.wait_shutdown(True, None, cb, i)
while GLib.MainContext.default().iteration(False):
pass
2022-10-05 12:22:51 +02:00
|
|
|
*/
|
|
|
|
|
gboolean
|
|
|
|
|
nm_client_wait_shutdown_finish(GAsyncResult *result, GError **error)
|
|
|
|
|
{
|
|
|
|
|
g_return_val_if_fail(nm_g_task_is_valid(result, NULL, nm_client_wait_shutdown), FALSE);
|
|
|
|
|
|
|
|
|
|
return g_task_propagate_boolean(G_TASK(result), error);
|
|
|
|
|
}
|
|
|
|
|
|
2016-10-12 11:26:26 +02:00
|
|
|
/*****************************************************************************
|
|
|
|
|
* Backported symbols. Usually, new API is only added in new major versions
|
2024-05-27 17:58:08 +02:00
|
|
|
* of NetworkManager (that is, on "main" branch). Sometimes however, we might
|
2016-10-12 11:26:26 +02:00
|
|
|
* have to backport some API to an older stable branch. In that case, we backport
|
|
|
|
|
* the symbols with a different version corresponding to the minor API.
|
|
|
|
|
*
|
2024-05-27 17:58:08 +02:00
|
|
|
* To allow upgrading from such a extended minor-release, "main" contains these
|
2016-10-12 11:26:26 +02:00
|
|
|
* backported symbols too.
|
|
|
|
|
*
|
|
|
|
|
* For example, 1.2.0 added nm_setting_connection_autoconnect_slaves_get_type.
|
|
|
|
|
* This was backported for 1.0.4 as nm_setting_connection_autoconnect_slaves_get_type@libnm_1_0_4
|
|
|
|
|
* To allow an application that was linked against 1.0.4 to seamlessly upgrade to
|
2024-05-27 17:58:08 +02:00
|
|
|
* a newer major version, the same symbols is also exposed on "main". Note, that
|
2016-10-12 11:26:26 +02:00
|
|
|
* a user can only seamlessly upgrade to a newer major version, that is released
|
|
|
|
|
* *after* 1.0.4 is out. In this example, 1.2.0 was released after 1.4.0, and thus
|
|
|
|
|
* a 1.0.4 user can upgrade to 1.2.0 ABI.
|
|
|
|
|
*****************************************************************************/
|
|
|
|
|
|
|
|
|
|
NM_BACKPORT_SYMBOL(libnm_1_0_4,
|
|
|
|
|
NMSettingConnectionAutoconnectSlaves,
|
|
|
|
|
nm_setting_connection_get_autoconnect_slaves,
|
|
|
|
|
(NMSettingConnection * setting),
|
|
|
|
|
(setting));
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2016-10-12 11:26:26 +02:00
|
|
|
NM_BACKPORT_SYMBOL(libnm_1_0_4,
|
|
|
|
|
GType,
|
|
|
|
|
nm_setting_connection_autoconnect_slaves_get_type,
|
|
|
|
|
(void),
|
|
|
|
|
());
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2016-10-12 11:26:26 +02:00
|
|
|
NM_BACKPORT_SYMBOL(libnm_1_0_6,
|
|
|
|
|
NMMetered,
|
|
|
|
|
nm_setting_connection_get_metered,
|
|
|
|
|
(NMSettingConnection * setting),
|
|
|
|
|
(setting));
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2016-10-12 11:26:26 +02:00
|
|
|
NM_BACKPORT_SYMBOL(libnm_1_0_6, GType, nm_metered_get_type, (void), ());
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2016-10-12 11:26:26 +02:00
|
|
|
NM_BACKPORT_SYMBOL(libnm_1_0_6,
|
|
|
|
|
NMSettingWiredWakeOnLan,
|
|
|
|
|
nm_setting_wired_get_wake_on_lan,
|
|
|
|
|
(NMSettingWired * setting),
|
|
|
|
|
(setting));
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2016-10-12 11:26:26 +02:00
|
|
|
NM_BACKPORT_SYMBOL(libnm_1_0_6,
|
|
|
|
|
const char *,
|
|
|
|
|
nm_setting_wired_get_wake_on_lan_password,
|
|
|
|
|
(NMSettingWired * setting),
|
|
|
|
|
(setting));
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2016-10-12 11:26:26 +02:00
|
|
|
NM_BACKPORT_SYMBOL(libnm_1_0_6, GType, nm_setting_wired_wake_on_lan_get_type, (void), ());
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2016-10-12 11:26:26 +02:00
|
|
|
NM_BACKPORT_SYMBOL(libnm_1_0_6, const guint *, nm_utils_wifi_2ghz_freqs, (void), ());
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2016-10-12 11:26:26 +02:00
|
|
|
NM_BACKPORT_SYMBOL(libnm_1_0_6, const guint *, nm_utils_wifi_5ghz_freqs, (void), ());
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2016-10-12 11:26:26 +02:00
|
|
|
NM_BACKPORT_SYMBOL(libnm_1_0_6,
|
|
|
|
|
char *,
|
|
|
|
|
nm_utils_enum_to_str,
|
|
|
|
|
(GType type, int value),
|
|
|
|
|
(type, value));
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2016-10-12 11:26:26 +02:00
|
|
|
NM_BACKPORT_SYMBOL(libnm_1_0_6,
|
|
|
|
|
gboolean,
|
|
|
|
|
nm_utils_enum_from_str,
|
|
|
|
|
(GType type, const char *str, int *out_value, char **err_token),
|
|
|
|
|
(type, str, out_value, err_token));
|
2020-09-28 16:03:33 +02:00
|
|
|
|
all: don't use gchar/gshort/gint/glong but C types
We commonly don't use the glib typedefs for char/short/int/long,
but their C types directly.
$ git grep '\<g\(char\|short\|int\|long\|float\|double\)\>' | wc -l
587
$ git grep '\<\(char\|short\|int\|long\|float\|double\)\>' | wc -l
21114
One could argue that using the glib typedefs is preferable in
public API (of our glib based libnm library) or where it clearly
is related to glib, like during
g_object_set (obj, PROPERTY, (gint) value, NULL);
However, that argument does not seem strong, because in practice we don't
follow that argument today, and seldomly use the glib typedefs.
Also, the style guide for this would be hard to formalize, because
"using them where clearly related to a glib" is a very loose suggestion.
Also note that glib typedefs will always just be typedefs of the
underlying C types. There is no danger of glib changing the meaning
of these typedefs (because that would be a major API break of glib).
A simple style guide is instead: don't use these typedefs.
No manual actions, I only ran the bash script:
FILES=($(git ls-files '*.[hc]'))
sed -i \
-e 's/\<g\(char\|short\|int\|long\|float\|double\)\>\( [^ ]\)/\1\2/g' \
-e 's/\<g\(char\|short\|int\|long\|float\|double\)\> /\1 /g' \
-e 's/\<g\(char\|short\|int\|long\|float\|double\)\>/\1/g' \
"${FILES[@]}"
2018-07-11 07:40:19 +02:00
|
|
|
NM_BACKPORT_SYMBOL(libnm_1_2_4,
|
|
|
|
|
int,
|
|
|
|
|
nm_setting_ip_config_get_dns_priority,
|
|
|
|
|
(NMSettingIPConfig * setting),
|
|
|
|
|
(setting));
|
2020-09-28 16:03:33 +02:00
|
|
|
|
2018-10-19 19:12:53 +02:00
|
|
|
NM_BACKPORT_SYMBOL(libnm_1_10_14,
|
|
|
|
|
NMSettingConnectionMdns,
|
|
|
|
|
nm_setting_connection_get_mdns,
|
|
|
|
|
(NMSettingConnection * setting),
|
|
|
|
|
(setting));
|
|
|
|
|
NM_BACKPORT_SYMBOL(libnm_1_10_14, GType, nm_setting_connection_mdns_get_type, (void), ());
|
2021-09-23 08:56:08 +02:00
|
|
|
|
|
|
|
|
NM_BACKPORT_SYMBOL(libnm_1_30_8,
|
|
|
|
|
int,
|
|
|
|
|
nm_setting_ip_config_get_required_timeout,
|
|
|
|
|
(NMSettingIPConfig * setting),
|
|
|
|
|
(setting));
|
2021-10-05 10:38:35 +02:00
|
|
|
|
|
|
|
|
NM_BACKPORT_SYMBOL(libnm_1_30_8,
|
|
|
|
|
NMIPAddress *,
|
|
|
|
|
nm_ip_address_dup,
|
|
|
|
|
(NMIPAddress * address),
|
|
|
|
|
(address));
|
|
|
|
|
|
|
|
|
|
NM_BACKPORT_SYMBOL(libnm_1_30_8, NMIPRoute *, nm_ip_route_dup, (NMIPRoute * route), (route));
|
2025-04-27 22:30:16 -04:00
|
|
|
|
2025-04-28 15:20:20 -04:00
|
|
|
NM_BACKPORT_SYMBOL(libnm_1_46_8,
|
|
|
|
|
gboolean,
|
|
|
|
|
nm_ethtool_optname_is_fec,
|
|
|
|
|
(const char *optname),
|
|
|
|
|
(optname));
|
|
|
|
|
|
|
|
|
|
NM_BACKPORT_SYMBOL(libnm_1_46_8, GType, nm_setting_ethtool_fec_mode_get_type, (void), ());
|
|
|
|
|
|
2025-04-28 15:07:49 -04:00
|
|
|
NM_BACKPORT_SYMBOL(libnm_1_48_18,
|
|
|
|
|
gboolean,
|
|
|
|
|
nm_ethtool_optname_is_fec,
|
|
|
|
|
(const char *optname),
|
|
|
|
|
(optname));
|
|
|
|
|
|
|
|
|
|
NM_BACKPORT_SYMBOL(libnm_1_48_18, GType, nm_setting_ethtool_fec_mode_get_type, (void), ());
|
|
|
|
|
|
2025-04-27 22:30:16 -04:00
|
|
|
NM_BACKPORT_SYMBOL(libnm_1_50_4,
|
|
|
|
|
gboolean,
|
|
|
|
|
nm_ethtool_optname_is_fec,
|
|
|
|
|
(const char *optname),
|
|
|
|
|
(optname));
|
|
|
|
|
|
|
|
|
|
NM_BACKPORT_SYMBOL(libnm_1_50_4, GType, nm_setting_ethtool_fec_mode_get_type, (void), ());
|
2026-01-20 11:43:05 +01:00
|
|
|
|
|
|
|
|
NM_BACKPORT_SYMBOL(libnm_1_52_2,
|
|
|
|
|
char *,
|
|
|
|
|
nm_utils_copy_cert_as_user,
|
|
|
|
|
(const char *filename, const char *user, GError **error),
|
|
|
|
|
(filename, user, error));
|
|
|
|
|
|
|
|
|
|
NM_BACKPORT_SYMBOL(libnm_1_52_2,
|
|
|
|
|
gboolean,
|
|
|
|
|
nm_vpn_plugin_info_supports_safe_private_file_access,
|
|
|
|
|
(NMVpnPluginInfo * self),
|
|
|
|
|
(self));
|
2026-01-20 11:57:30 +01:00
|
|
|
|
|
|
|
|
NM_BACKPORT_SYMBOL(libnm_1_54_3,
|
|
|
|
|
char *,
|
|
|
|
|
nm_utils_copy_cert_as_user,
|
|
|
|
|
(const char *filename, const char *user, GError **error),
|
|
|
|
|
(filename, user, error));
|
|
|
|
|
|
|
|
|
|
NM_BACKPORT_SYMBOL(libnm_1_54_3,
|
|
|
|
|
gboolean,
|
|
|
|
|
nm_vpn_plugin_info_supports_safe_private_file_access,
|
|
|
|
|
(NMVpnPluginInfo * self),
|
|
|
|
|
(self));
|